Initial Linux ZFS GIT Repo

This commit is contained in:
Brian Behlendorf
2008-11-20 12:01:55 -08:00
commit 34dc7c2f25
444 changed files with 187636 additions and 0 deletions
+16
View File
@@ -0,0 +1,16 @@
subdir-m += lib
subdir-m += zcmd
all:
# Make the exported SPL symbols available to this module. There
# is probably a better way to do this, but this will have to do
# for now... an option to modpost perhaps.
cp @splsymvers@ .
# Kick off the kernel build system
$(MAKE) -C @LINUX@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ modules
install uninstall clean distclean maintainer-clean distdir:
$(MAKE) -C @LINUX@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ $@
check:
+12
View File
@@ -0,0 +1,12 @@
subdir-m += libuutil # User space util support
subdir-m += libumem # User space memory support
subdir-m += libzfs # User space library support
subdir-m += libsolcompat # User space compatibility library
subdir-m += libzpool # Kernel DMU/SPA
subdir-m += libdmu-ctl # Kernel control interface
subdir-m += libavl # Kernel + user space AVL tree support
subdir-m += libnvpair # Kernel + user space name/value support
subdir-m += libzcommon # Kernel + user space common support
subdir-m += libport # Kernel + user space linux support
+31
View File
@@ -0,0 +1,31 @@
subdir-m += include
DISTFILES = avl.c
MODULE := zavl
LIBRARY := libavl
# Compile as kernel module. Needed symlinks created for all
# k* objects created by top level configure script.
EXTRA_CFLAGS = @KERNELCPPFLAGS@
EXTRA_CFLAGS += -I@LIBDIR@/libavl/include
obj-m := ${MODULE}.o
${MODULE}-objs += kavl.o # Generic AVL support
# Compile as shared library. There's an extra useless host program
# here called 'zu' because it was the easiest way I could convince
# the kernel build system to construct a user space shared library.
HOSTCFLAGS += @HOSTCFLAGS@
HOSTCFLAGS += -I@LIBDIR@/libsolcompat/include
HOSTCFLAGS += -I@LIBDIR@/libport/include
HOSTCFLAGS += -I@LIBDIR@/libavl/include
hostprogs-y := zu
always := $(hostprogs-y)
zu-objs := zu.o ${LIBRARY}.so
${LIBRARY}-objs += uavl.o
+969
View File
@@ -0,0 +1,969 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* AVL - generic AVL tree implementation for kernel use
*
* A complete description of AVL trees can be found in many CS textbooks.
*
* Here is a very brief overview. An AVL tree is a binary search tree that is
* almost perfectly balanced. By "almost" perfectly balanced, we mean that at
* any given node, the left and right subtrees are allowed to differ in height
* by at most 1 level.
*
* This relaxation from a perfectly balanced binary tree allows doing
* insertion and deletion relatively efficiently. Searching the tree is
* still a fast operation, roughly O(log(N)).
*
* The key to insertion and deletion is a set of tree maniuplations called
* rotations, which bring unbalanced subtrees back into the semi-balanced state.
*
* This implementation of AVL trees has the following peculiarities:
*
* - The AVL specific data structures are physically embedded as fields
* in the "using" data structures. To maintain generality the code
* must constantly translate between "avl_node_t *" and containing
* data structure "void *"s by adding/subracting the avl_offset.
*
* - Since the AVL data is always embedded in other structures, there is
* no locking or memory allocation in the AVL routines. This must be
* provided for by the enclosing data structure's semantics. Typically,
* avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
* exclusive write lock. Other operations require a read lock.
*
* - The implementation uses iteration instead of explicit recursion,
* since it is intended to run on limited size kernel stacks. Since
* there is no recursion stack present to move "up" in the tree,
* there is an explicit "parent" link in the avl_node_t.
*
* - The left/right children pointers of a node are in an array.
* In the code, variables (instead of constants) are used to represent
* left and right indices. The implementation is written as if it only
* dealt with left handed manipulations. By changing the value assigned
* to "left", the code also works for right handed trees. The
* following variables/terms are frequently used:
*
* int left; // 0 when dealing with left children,
* // 1 for dealing with right children
*
* int left_heavy; // -1 when left subtree is taller at some node,
* // +1 when right subtree is taller
*
* int right; // will be the opposite of left (0 or 1)
* int right_heavy;// will be the opposite of left_heavy (-1 or 1)
*
* int direction; // 0 for "<" (ie. left child); 1 for ">" (right)
*
* Though it is a little more confusing to read the code, the approach
* allows using half as much code (and hence cache footprint) for tree
* manipulations and eliminates many conditional branches.
*
* - The avl_index_t is an opaque "cookie" used to find nodes at or
* adjacent to where a new value would be inserted in the tree. The value
* is a modified "avl_node_t *". The bottom bit (normally 0 for a
* pointer) is set to indicate if that the new node has a value greater
* than the value of the indicated "avl_node_t *".
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/debug.h>
#include <sys/avl.h>
#include <sys/cmn_err.h>
/*
* Small arrays to translate between balance (or diff) values and child indeces.
*
* Code that deals with binary tree data structures will randomly use
* left and right children when examining a tree. C "if()" statements
* which evaluate randomly suffer from very poor hardware branch prediction.
* In this code we avoid some of the branch mispredictions by using the
* following translation arrays. They replace random branches with an
* additional memory reference. Since the translation arrays are both very
* small the data should remain efficiently in cache.
*/
static const int avl_child2balance[2] = {-1, 1};
static const int avl_balance2child[] = {0, 0, 1};
/*
* Walk from one node to the previous valued node (ie. an infix walk
* towards the left). At any given node we do one of 2 things:
*
* - If there is a left child, go to it, then to it's rightmost descendant.
*
* - otherwise we return thru parent nodes until we've come from a right child.
*
* Return Value:
* NULL - if at the end of the nodes
* otherwise next node
*/
void *
avl_walk(avl_tree_t *tree, void *oldnode, int left)
{
size_t off = tree->avl_offset;
avl_node_t *node = AVL_DATA2NODE(oldnode, off);
int right = 1 - left;
int was_child;
/*
* nowhere to walk to if tree is empty
*/
if (node == NULL)
return (NULL);
/*
* Visit the previous valued node. There are two possibilities:
*
* If this node has a left child, go down one left, then all
* the way right.
*/
if (node->avl_child[left] != NULL) {
for (node = node->avl_child[left];
node->avl_child[right] != NULL;
node = node->avl_child[right])
;
/*
* Otherwise, return thru left children as far as we can.
*/
} else {
for (;;) {
was_child = AVL_XCHILD(node);
node = AVL_XPARENT(node);
if (node == NULL)
return (NULL);
if (was_child == right)
break;
}
}
return (AVL_NODE2DATA(node, off));
}
/*
* Return the lowest valued node in a tree or NULL.
* (leftmost child from root of tree)
*/
void *
avl_first(avl_tree_t *tree)
{
avl_node_t *node;
avl_node_t *prev = NULL;
size_t off = tree->avl_offset;
for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
prev = node;
if (prev != NULL)
return (AVL_NODE2DATA(prev, off));
return (NULL);
}
/*
* Return the highest valued node in a tree or NULL.
* (rightmost child from root of tree)
*/
void *
avl_last(avl_tree_t *tree)
{
avl_node_t *node;
avl_node_t *prev = NULL;
size_t off = tree->avl_offset;
for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
prev = node;
if (prev != NULL)
return (AVL_NODE2DATA(prev, off));
return (NULL);
}
/*
* Access the node immediately before or after an insertion point.
*
* "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
*
* Return value:
* NULL: no node in the given direction
* "void *" of the found tree node
*/
void *
avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
{
int child = AVL_INDEX2CHILD(where);
avl_node_t *node = AVL_INDEX2NODE(where);
void *data;
size_t off = tree->avl_offset;
if (node == NULL) {
ASSERT(tree->avl_root == NULL);
return (NULL);
}
data = AVL_NODE2DATA(node, off);
if (child != direction)
return (data);
return (avl_walk(tree, data, direction));
}
/*
* Search for the node which contains "value". The algorithm is a
* simple binary tree search.
*
* return value:
* NULL: the value is not in the AVL tree
* *where (if not NULL) is set to indicate the insertion point
* "void *" of the found tree node
*/
void *
avl_find(avl_tree_t *tree, void *value, avl_index_t *where)
{
avl_node_t *node;
avl_node_t *prev = NULL;
int child = 0;
int diff;
size_t off = tree->avl_offset;
for (node = tree->avl_root; node != NULL;
node = node->avl_child[child]) {
prev = node;
diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
ASSERT(-1 <= diff && diff <= 1);
if (diff == 0) {
#ifdef DEBUG
if (where != NULL)
*where = 0;
#endif
return (AVL_NODE2DATA(node, off));
}
child = avl_balance2child[1 + diff];
}
if (where != NULL)
*where = AVL_MKINDEX(prev, child);
return (NULL);
}
/*
* Perform a rotation to restore balance at the subtree given by depth.
*
* This routine is used by both insertion and deletion. The return value
* indicates:
* 0 : subtree did not change height
* !0 : subtree was reduced in height
*
* The code is written as if handling left rotations, right rotations are
* symmetric and handled by swapping values of variables right/left[_heavy]
*
* On input balance is the "new" balance at "node". This value is either
* -2 or +2.
*/
static int
avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
{
int left = !(balance < 0); /* when balance = -2, left will be 0 */
int right = 1 - left;
int left_heavy = balance >> 1;
int right_heavy = -left_heavy;
avl_node_t *parent = AVL_XPARENT(node);
avl_node_t *child = node->avl_child[left];
avl_node_t *cright;
avl_node_t *gchild;
avl_node_t *gright;
avl_node_t *gleft;
int which_child = AVL_XCHILD(node);
int child_bal = AVL_XBALANCE(child);
/* BEGIN CSTYLED */
/*
* case 1 : node is overly left heavy, the left child is balanced or
* also left heavy. This requires the following rotation.
*
* (node bal:-2)
* / \
* / \
* (child bal:0 or -1)
* / \
* / \
* cright
*
* becomes:
*
* (child bal:1 or 0)
* / \
* / \
* (node bal:-1 or 0)
* / \
* / \
* cright
*
* we detect this situation by noting that child's balance is not
* right_heavy.
*/
/* END CSTYLED */
if (child_bal != right_heavy) {
/*
* compute new balance of nodes
*
* If child used to be left heavy (now balanced) we reduced
* the height of this sub-tree -- used in "return...;" below
*/
child_bal += right_heavy; /* adjust towards right */
/*
* move "cright" to be node's left child
*/
cright = child->avl_child[right];
node->avl_child[left] = cright;
if (cright != NULL) {
AVL_SETPARENT(cright, node);
AVL_SETCHILD(cright, left);
}
/*
* move node to be child's right child
*/
child->avl_child[right] = node;
AVL_SETBALANCE(node, -child_bal);
AVL_SETCHILD(node, right);
AVL_SETPARENT(node, child);
/*
* update the pointer into this subtree
*/
AVL_SETBALANCE(child, child_bal);
AVL_SETCHILD(child, which_child);
AVL_SETPARENT(child, parent);
if (parent != NULL)
parent->avl_child[which_child] = child;
else
tree->avl_root = child;
return (child_bal == 0);
}
/* BEGIN CSTYLED */
/*
* case 2 : When node is left heavy, but child is right heavy we use
* a different rotation.
*
* (node b:-2)
* / \
* / \
* / \
* (child b:+1)
* / \
* / \
* (gchild b: != 0)
* / \
* / \
* gleft gright
*
* becomes:
*
* (gchild b:0)
* / \
* / \
* / \
* (child b:?) (node b:?)
* / \ / \
* / \ / \
* gleft gright
*
* computing the new balances is more complicated. As an example:
* if gchild was right_heavy, then child is now left heavy
* else it is balanced
*/
/* END CSTYLED */
gchild = child->avl_child[right];
gleft = gchild->avl_child[left];
gright = gchild->avl_child[right];
/*
* move gright to left child of node and
*
* move gleft to right child of node
*/
node->avl_child[left] = gright;
if (gright != NULL) {
AVL_SETPARENT(gright, node);
AVL_SETCHILD(gright, left);
}
child->avl_child[right] = gleft;
if (gleft != NULL) {
AVL_SETPARENT(gleft, child);
AVL_SETCHILD(gleft, right);
}
/*
* move child to left child of gchild and
*
* move node to right child of gchild and
*
* fixup parent of all this to point to gchild
*/
balance = AVL_XBALANCE(gchild);
gchild->avl_child[left] = child;
AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
AVL_SETPARENT(child, gchild);
AVL_SETCHILD(child, left);
gchild->avl_child[right] = node;
AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
AVL_SETPARENT(node, gchild);
AVL_SETCHILD(node, right);
AVL_SETBALANCE(gchild, 0);
AVL_SETPARENT(gchild, parent);
AVL_SETCHILD(gchild, which_child);
if (parent != NULL)
parent->avl_child[which_child] = gchild;
else
tree->avl_root = gchild;
return (1); /* the new tree is always shorter */
}
/*
* Insert a new node into an AVL tree at the specified (from avl_find()) place.
*
* Newly inserted nodes are always leaf nodes in the tree, since avl_find()
* searches out to the leaf positions. The avl_index_t indicates the node
* which will be the parent of the new node.
*
* After the node is inserted, a single rotation further up the tree may
* be necessary to maintain an acceptable AVL balance.
*/
void
avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
{
avl_node_t *node;
avl_node_t *parent = AVL_INDEX2NODE(where);
int old_balance;
int new_balance;
int which_child = AVL_INDEX2CHILD(where);
size_t off = tree->avl_offset;
ASSERT(tree);
#ifdef _LP64
ASSERT(((uintptr_t)new_data & 0x7) == 0);
#endif
node = AVL_DATA2NODE(new_data, off);
/*
* First, add the node to the tree at the indicated position.
*/
++tree->avl_numnodes;
node->avl_child[0] = NULL;
node->avl_child[1] = NULL;
AVL_SETCHILD(node, which_child);
AVL_SETBALANCE(node, 0);
AVL_SETPARENT(node, parent);
if (parent != NULL) {
ASSERT(parent->avl_child[which_child] == NULL);
parent->avl_child[which_child] = node;
} else {
ASSERT(tree->avl_root == NULL);
tree->avl_root = node;
}
/*
* Now, back up the tree modifying the balance of all nodes above the
* insertion point. If we get to a highly unbalanced ancestor, we
* need to do a rotation. If we back out of the tree we are done.
* If we brought any subtree into perfect balance (0), we are also done.
*/
for (;;) {
node = parent;
if (node == NULL)
return;
/*
* Compute the new balance
*/
old_balance = AVL_XBALANCE(node);
new_balance = old_balance + avl_child2balance[which_child];
/*
* If we introduced equal balance, then we are done immediately
*/
if (new_balance == 0) {
AVL_SETBALANCE(node, 0);
return;
}
/*
* If both old and new are not zero we went
* from -1 to -2 balance, do a rotation.
*/
if (old_balance != 0)
break;
AVL_SETBALANCE(node, new_balance);
parent = AVL_XPARENT(node);
which_child = AVL_XCHILD(node);
}
/*
* perform a rotation to fix the tree and return
*/
(void) avl_rotation(tree, node, new_balance);
}
/*
* Insert "new_data" in "tree" in the given "direction" either after or
* before (AVL_AFTER, AVL_BEFORE) the data "here".
*
* Insertions can only be done at empty leaf points in the tree, therefore
* if the given child of the node is already present we move to either
* the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
* every other node in the tree is a leaf, this always works.
*
* To help developers using this interface, we assert that the new node
* is correctly ordered at every step of the way in DEBUG kernels.
*/
void
avl_insert_here(
avl_tree_t *tree,
void *new_data,
void *here,
int direction)
{
avl_node_t *node;
int child = direction; /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
#ifdef DEBUG
int diff;
#endif
ASSERT(tree != NULL);
ASSERT(new_data != NULL);
ASSERT(here != NULL);
ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
/*
* If corresponding child of node is not NULL, go to the neighboring
* node and reverse the insertion direction.
*/
node = AVL_DATA2NODE(here, tree->avl_offset);
#ifdef DEBUG
diff = tree->avl_compar(new_data, here);
ASSERT(-1 <= diff && diff <= 1);
ASSERT(diff != 0);
ASSERT(diff > 0 ? child == 1 : child == 0);
#endif
if (node->avl_child[child] != NULL) {
node = node->avl_child[child];
child = 1 - child;
while (node->avl_child[child] != NULL) {
#ifdef DEBUG
diff = tree->avl_compar(new_data,
AVL_NODE2DATA(node, tree->avl_offset));
ASSERT(-1 <= diff && diff <= 1);
ASSERT(diff != 0);
ASSERT(diff > 0 ? child == 1 : child == 0);
#endif
node = node->avl_child[child];
}
#ifdef DEBUG
diff = tree->avl_compar(new_data,
AVL_NODE2DATA(node, tree->avl_offset));
ASSERT(-1 <= diff && diff <= 1);
ASSERT(diff != 0);
ASSERT(diff > 0 ? child == 1 : child == 0);
#endif
}
ASSERT(node->avl_child[child] == NULL);
avl_insert(tree, new_data, AVL_MKINDEX(node, child));
}
/*
* Add a new node to an AVL tree.
*/
void
avl_add(avl_tree_t *tree, void *new_node)
{
avl_index_t where;
/*
* This is unfortunate. We want to call panic() here, even for
* non-DEBUG kernels. In userland, however, we can't depend on anything
* in libc or else the rtld build process gets confused. So, all we can
* do in userland is resort to a normal ASSERT().
*/
if (avl_find(tree, new_node, &where) != NULL)
#ifdef _KERNEL
panic("avl_find() succeeded inside avl_add()");
#else
ASSERT(0);
#endif
avl_insert(tree, new_node, where);
}
/*
* Delete a node from the AVL tree. Deletion is similar to insertion, but
* with 2 complications.
*
* First, we may be deleting an interior node. Consider the following subtree:
*
* d c c
* / \ / \ / \
* b e b e b e
* / \ / \ /
* a c a a
*
* When we are deleting node (d), we find and bring up an adjacent valued leaf
* node, say (c), to take the interior node's place. In the code this is
* handled by temporarily swapping (d) and (c) in the tree and then using
* common code to delete (d) from the leaf position.
*
* Secondly, an interior deletion from a deep tree may require more than one
* rotation to fix the balance. This is handled by moving up the tree through
* parents and applying rotations as needed. The return value from
* avl_rotation() is used to detect when a subtree did not change overall
* height due to a rotation.
*/
void
avl_remove(avl_tree_t *tree, void *data)
{
avl_node_t *delete;
avl_node_t *parent;
avl_node_t *node;
avl_node_t tmp;
int old_balance;
int new_balance;
int left;
int right;
int which_child;
size_t off = tree->avl_offset;
ASSERT(tree);
delete = AVL_DATA2NODE(data, off);
/*
* Deletion is easiest with a node that has at most 1 child.
* We swap a node with 2 children with a sequentially valued
* neighbor node. That node will have at most 1 child. Note this
* has no effect on the ordering of the remaining nodes.
*
* As an optimization, we choose the greater neighbor if the tree
* is right heavy, otherwise the left neighbor. This reduces the
* number of rotations needed.
*/
if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
/*
* choose node to swap from whichever side is taller
*/
old_balance = AVL_XBALANCE(delete);
left = avl_balance2child[old_balance + 1];
right = 1 - left;
/*
* get to the previous value'd node
* (down 1 left, as far as possible right)
*/
for (node = delete->avl_child[left];
node->avl_child[right] != NULL;
node = node->avl_child[right])
;
/*
* create a temp placeholder for 'node'
* move 'node' to delete's spot in the tree
*/
tmp = *node;
*node = *delete;
if (node->avl_child[left] == node)
node->avl_child[left] = &tmp;
parent = AVL_XPARENT(node);
if (parent != NULL)
parent->avl_child[AVL_XCHILD(node)] = node;
else
tree->avl_root = node;
AVL_SETPARENT(node->avl_child[left], node);
AVL_SETPARENT(node->avl_child[right], node);
/*
* Put tmp where node used to be (just temporary).
* It always has a parent and at most 1 child.
*/
delete = &tmp;
parent = AVL_XPARENT(delete);
parent->avl_child[AVL_XCHILD(delete)] = delete;
which_child = (delete->avl_child[1] != 0);
if (delete->avl_child[which_child] != NULL)
AVL_SETPARENT(delete->avl_child[which_child], delete);
}
/*
* Here we know "delete" is at least partially a leaf node. It can
* be easily removed from the tree.
*/
ASSERT(tree->avl_numnodes > 0);
--tree->avl_numnodes;
parent = AVL_XPARENT(delete);
which_child = AVL_XCHILD(delete);
if (delete->avl_child[0] != NULL)
node = delete->avl_child[0];
else
node = delete->avl_child[1];
/*
* Connect parent directly to node (leaving out delete).
*/
if (node != NULL) {
AVL_SETPARENT(node, parent);
AVL_SETCHILD(node, which_child);
}
if (parent == NULL) {
tree->avl_root = node;
return;
}
parent->avl_child[which_child] = node;
/*
* Since the subtree is now shorter, begin adjusting parent balances
* and performing any needed rotations.
*/
do {
/*
* Move up the tree and adjust the balance
*
* Capture the parent and which_child values for the next
* iteration before any rotations occur.
*/
node = parent;
old_balance = AVL_XBALANCE(node);
new_balance = old_balance - avl_child2balance[which_child];
parent = AVL_XPARENT(node);
which_child = AVL_XCHILD(node);
/*
* If a node was in perfect balance but isn't anymore then
* we can stop, since the height didn't change above this point
* due to a deletion.
*/
if (old_balance == 0) {
AVL_SETBALANCE(node, new_balance);
break;
}
/*
* If the new balance is zero, we don't need to rotate
* else
* need a rotation to fix the balance.
* If the rotation doesn't change the height
* of the sub-tree we have finished adjusting.
*/
if (new_balance == 0)
AVL_SETBALANCE(node, new_balance);
else if (!avl_rotation(tree, node, new_balance))
break;
} while (parent != NULL);
}
/*
* initialize a new AVL tree
*/
void
avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
size_t size, size_t offset)
{
ASSERT(tree);
ASSERT(compar);
ASSERT(size > 0);
ASSERT(size >= offset + sizeof (avl_node_t));
#ifdef _LP64
ASSERT((offset & 0x7) == 0);
#endif
tree->avl_compar = compar;
tree->avl_root = NULL;
tree->avl_numnodes = 0;
tree->avl_size = size;
tree->avl_offset = offset;
}
/*
* Delete a tree.
*/
/* ARGSUSED */
void
avl_destroy(avl_tree_t *tree)
{
ASSERT(tree);
ASSERT(tree->avl_numnodes == 0);
ASSERT(tree->avl_root == NULL);
}
/*
* Return the number of nodes in an AVL tree.
*/
ulong_t
avl_numnodes(avl_tree_t *tree)
{
ASSERT(tree);
return (tree->avl_numnodes);
}
#define CHILDBIT (1L)
/*
* Post-order tree walk used to visit all tree nodes and destroy the tree
* in post order. This is used for destroying a tree w/o paying any cost
* for rebalancing it.
*
* example:
*
* void *cookie = NULL;
* my_data_t *node;
*
* while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
* free(node);
* avl_destroy(tree);
*
* The cookie is really an avl_node_t to the current node's parent and
* an indication of which child you looked at last.
*
* On input, a cookie value of CHILDBIT indicates the tree is done.
*/
void *
avl_destroy_nodes(avl_tree_t *tree, void **cookie)
{
avl_node_t *node;
avl_node_t *parent;
int child;
void *first;
size_t off = tree->avl_offset;
/*
* Initial calls go to the first node or it's right descendant.
*/
if (*cookie == NULL) {
first = avl_first(tree);
/*
* deal with an empty tree
*/
if (first == NULL) {
*cookie = (void *)CHILDBIT;
return (NULL);
}
node = AVL_DATA2NODE(first, off);
parent = AVL_XPARENT(node);
goto check_right_side;
}
/*
* If there is no parent to return to we are done.
*/
parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
if (parent == NULL) {
if (tree->avl_root != NULL) {
ASSERT(tree->avl_numnodes == 1);
tree->avl_root = NULL;
tree->avl_numnodes = 0;
}
return (NULL);
}
/*
* Remove the child pointer we just visited from the parent and tree.
*/
child = (uintptr_t)(*cookie) & CHILDBIT;
parent->avl_child[child] = NULL;
ASSERT(tree->avl_numnodes > 1);
--tree->avl_numnodes;
/*
* If we just did a right child or there isn't one, go up to parent.
*/
if (child == 1 || parent->avl_child[1] == NULL) {
node = parent;
parent = AVL_XPARENT(parent);
goto done;
}
/*
* Do parent's right child, then leftmost descendent.
*/
node = parent->avl_child[1];
while (node->avl_child[0] != NULL) {
parent = node;
node = node->avl_child[0];
}
/*
* If here, we moved to a left child. It may have one
* child on the right (when balance == +1).
*/
check_right_side:
if (node->avl_child[1] != NULL) {
ASSERT(AVL_XBALANCE(node) == 1);
parent = node;
node = node->avl_child[1];
ASSERT(node->avl_child[0] == NULL &&
node->avl_child[1] == NULL);
} else {
ASSERT(AVL_XBALANCE(node) <= 0);
}
done:
if (parent == NULL) {
*cookie = (void *)CHILDBIT;
ASSERT(node == tree->avl_root);
} else {
*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
}
return (AVL_NODE2DATA(node, off));
}
+1
View File
@@ -0,0 +1 @@
subdir-m += sys
+1
View File
@@ -0,0 +1 @@
DISTFILES = avl.h avl_impl.h
+298
View File
@@ -0,0 +1,298 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _AVL_H
#define _AVL_H
/*
* This is a private header file. Applications should not directly include
* this file.
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/avl_impl.h>
/*
* This is a generic implemenatation of AVL trees for use in the Solaris kernel.
* The interfaces provide an efficient way of implementing an ordered set of
* data structures.
*
* AVL trees provide an alternative to using an ordered linked list. Using AVL
* trees will usually be faster, however they requires more storage. An ordered
* linked list in general requires 2 pointers in each data structure. The
* AVL tree implementation uses 3 pointers. The following chart gives the
* approximate performance of operations with the different approaches:
*
* Operation Link List AVL tree
* --------- -------- --------
* lookup O(n) O(log(n))
*
* insert 1 node constant constant
*
* delete 1 node constant between constant and O(log(n))
*
* delete all nodes O(n) O(n)
*
* visit the next
* or prev node constant between constant and O(log(n))
*
*
* The data structure nodes are anchored at an "avl_tree_t" (the equivalent
* of a list header) and the individual nodes will have a field of
* type "avl_node_t" (corresponding to list pointers).
*
* The type "avl_index_t" is used to indicate a position in the list for
* certain calls.
*
* The usage scenario is generally:
*
* 1. Create the list/tree with: avl_create()
*
* followed by any mixture of:
*
* 2a. Insert nodes with: avl_add(), or avl_find() and avl_insert()
*
* 2b. Visited elements with:
* avl_first() - returns the lowest valued node
* avl_last() - returns the highest valued node
* AVL_NEXT() - given a node go to next higher one
* AVL_PREV() - given a node go to previous lower one
*
* 2c. Find the node with the closest value either less than or greater
* than a given value with avl_nearest().
*
* 2d. Remove individual nodes from the list/tree with avl_remove().
*
* and finally when the list is being destroyed
*
* 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes.
* Note that once you use avl_destroy_nodes(), you can no longer
* use any routine except avl_destroy_nodes() and avl_destoy().
*
* 4. Use avl_destroy() to destroy the AVL tree itself.
*
* Any locking for multiple thread access is up to the user to provide, just
* as is needed for any linked list implementation.
*/
/*
* Type used for the root of the AVL tree.
*/
typedef struct avl_tree avl_tree_t;
/*
* The data nodes in the AVL tree must have a field of this type.
*/
typedef struct avl_node avl_node_t;
/*
* An opaque type used to locate a position in the tree where a node
* would be inserted.
*/
typedef uintptr_t avl_index_t;
/*
* Direction constants used for avl_nearest().
*/
#define AVL_BEFORE (0)
#define AVL_AFTER (1)
/*
* Prototypes
*
* Where not otherwise mentioned, "void *" arguments are a pointer to the
* user data structure which must contain a field of type avl_node_t.
*
* Also assume the user data structures looks like:
* stuct my_type {
* ...
* avl_node_t my_link;
* ...
* };
*/
/*
* Initialize an AVL tree. Arguments are:
*
* tree - the tree to be initialized
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
* -1 for <, 0 for ==, and +1 for >
* size - the value of sizeof(struct my_type)
* offset - the value of OFFSETOF(struct my_type, my_link)
*/
extern void avl_create(avl_tree_t *tree,
int (*compar) (const void *, const void *), size_t size, size_t offset);
/*
* Find a node with a matching value in the tree. Returns the matching node
* found. If not found, it returns NULL and then if "where" is not NULL it sets
* "where" for use with avl_insert() or avl_nearest().
*
* node - node that has the value being looked for
* where - position for use with avl_nearest() or avl_insert(), may be NULL
*/
extern void *avl_find(avl_tree_t *tree, void *node, avl_index_t *where);
/*
* Insert a node into the tree.
*
* node - the node to insert
* where - position as returned from avl_find()
*/
extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where);
/*
* Insert "new_data" in "tree" in the given "direction" either after
* or before the data "here".
*
* This might be usefull for avl clients caching recently accessed
* data to avoid doing avl_find() again for insertion.
*
* new_data - new data to insert
* here - existing node in "tree"
* direction - either AVL_AFTER or AVL_BEFORE the data "here".
*/
extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here,
int direction);
/*
* Return the first or last valued node in the tree. Will return NULL
* if the tree is empty.
*
*/
extern void *avl_first(avl_tree_t *tree);
extern void *avl_last(avl_tree_t *tree);
/*
* Return the next or previous valued node in the tree.
* AVL_NEXT() will return NULL if at the last node.
* AVL_PREV() will return NULL if at the first node.
*
* node - the node from which the next or previous node is found
*/
#define AVL_NEXT(tree, node) avl_walk(tree, node, AVL_AFTER)
#define AVL_PREV(tree, node) avl_walk(tree, node, AVL_BEFORE)
/*
* Find the node with the nearest value either greater or less than
* the value from a previous avl_find(). Returns the node or NULL if
* there isn't a matching one.
*
* where - position as returned from avl_find()
* direction - either AVL_BEFORE or AVL_AFTER
*
* EXAMPLE get the greatest node that is less than a given value:
*
* avl_tree_t *tree;
* struct my_data look_for_value = {....};
* struct my_data *node;
* struct my_data *less;
* avl_index_t where;
*
* node = avl_find(tree, &look_for_value, &where);
* if (node != NULL)
* less = AVL_PREV(tree, node);
* else
* less = avl_nearest(tree, where, AVL_BEFORE);
*/
extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction);
/*
* Add a single node to the tree.
* The node must not be in the tree, and it must not
* compare equal to any other node already in the tree.
*
* node - the node to add
*/
extern void avl_add(avl_tree_t *tree, void *node);
/*
* Remove a single node from the tree. The node must be in the tree.
*
* node - the node to remove
*/
extern void avl_remove(avl_tree_t *tree, void *node);
/*
* Return the number of nodes in the tree
*/
extern ulong_t avl_numnodes(avl_tree_t *tree);
/*
* Used to destroy any remaining nodes in a tree. The cookie argument should
* be initialized to NULL before the first call. Returns a node that has been
* removed from the tree and may be free()'d. Returns NULL when the tree is
* empty.
*
* Once you call avl_destroy_nodes(), you can only continuing calling it and
* finally avl_destroy(). No other AVL routines will be valid.
*
* cookie - a "void *" used to save state between calls to avl_destroy_nodes()
*
* EXAMPLE:
* avl_tree_t *tree;
* struct my_data *node;
* void *cookie;
*
* cookie = NULL;
* while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
* free(node);
* avl_destroy(tree);
*/
extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie);
/*
* Final destroy of an AVL tree. Arguments are:
*
* tree - the empty tree to destroy
*/
extern void avl_destroy(avl_tree_t *tree);
#ifdef __cplusplus
}
#endif
#endif /* _AVL_H */
+164
View File
@@ -0,0 +1,164 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _AVL_IMPL_H
#define _AVL_IMPL_H
/*
* This is a private header file. Applications should not directly include
* this file.
*/
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* generic AVL tree implementation for kernel use
*
* There are 5 pieces of information stored for each node in an AVL tree
*
* pointer to less than child
* pointer to greater than child
* a pointer to the parent of this node
* an indication [0/1] of which child I am of my parent
* a "balance" (-1, 0, +1) indicating which child tree is taller
*
* Since they only need 3 bits, the last two fields are packed into the
* bottom bits of the parent pointer on 64 bit machines to save on space.
*/
#ifndef _LP64
struct avl_node {
struct avl_node *avl_child[2]; /* left/right children */
struct avl_node *avl_parent; /* this node's parent */
unsigned short avl_child_index; /* my index in parent's avl_child[] */
short avl_balance; /* balance value: -1, 0, +1 */
};
#define AVL_XPARENT(n) ((n)->avl_parent)
#define AVL_SETPARENT(n, p) ((n)->avl_parent = (p))
#define AVL_XCHILD(n) ((n)->avl_child_index)
#define AVL_SETCHILD(n, c) ((n)->avl_child_index = (unsigned short)(c))
#define AVL_XBALANCE(n) ((n)->avl_balance)
#define AVL_SETBALANCE(n, b) ((n)->avl_balance = (short)(b))
#else /* _LP64 */
/*
* for 64 bit machines, avl_pcb contains parent pointer, balance and child_index
* values packed in the following manner:
*
* |63 3| 2 |1 0 |
* |-------------------------------------|-----------------|-------------|
* | avl_parent hi order bits | avl_child_index | avl_balance |
* | | | + 1 |
* |-------------------------------------|-----------------|-------------|
*
*/
struct avl_node {
struct avl_node *avl_child[2]; /* left/right children nodes */
uintptr_t avl_pcb; /* parent, child_index, balance */
};
/*
* macros to extract/set fields in avl_pcb
*
* pointer to the parent of the current node is the high order bits
*/
#define AVL_XPARENT(n) ((struct avl_node *)((n)->avl_pcb & ~7))
#define AVL_SETPARENT(n, p) \
((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))
/*
* index of this node in its parent's avl_child[]: bit #2
*/
#define AVL_XCHILD(n) (((n)->avl_pcb >> 2) & 1)
#define AVL_SETCHILD(n, c) \
((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2)))
/*
* balance indication for a node, lowest 2 bits. A valid balance is
* -1, 0, or +1, and is encoded by adding 1 to the value to get the
* unsigned values of 0, 1, 2.
*/
#define AVL_XBALANCE(n) ((int)(((n)->avl_pcb & 3) - 1))
#define AVL_SETBALANCE(n, b) \
((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1))))
#endif /* _LP64 */
/*
* switch between a node and data pointer for a given tree
* the value of "o" is tree->avl_offset
*/
#define AVL_NODE2DATA(n, o) ((void *)((uintptr_t)(n) - (o)))
#define AVL_DATA2NODE(d, o) ((struct avl_node *)((uintptr_t)(d) + (o)))
/*
* macros used to create/access an avl_index_t
*/
#define AVL_INDEX2NODE(x) ((avl_node_t *)((x) & ~1))
#define AVL_INDEX2CHILD(x) ((x) & 1)
#define AVL_MKINDEX(n, c) ((avl_index_t)(n) | (c))
/*
* The tree structure. The fields avl_root, avl_compar, and avl_offset come
* first since they are needed for avl_find(). We want them to fit into
* a single 64 byte cache line to make avl_find() as fast as possible.
*/
struct avl_tree {
struct avl_node *avl_root; /* root node in tree */
int (*avl_compar)(const void *, const void *);
size_t avl_offset; /* offsetof(type, avl_link_t field) */
ulong_t avl_numnodes; /* number of nodes in the tree */
size_t avl_size; /* sizeof user type struct */
};
/*
* This will only by used via AVL_NEXT() or AVL_PREV()
*/
extern void *avl_walk(struct avl_tree *, void *, int);
#ifdef __cplusplus
}
#endif
#endif /* _AVL_IMPL_H */
+28
View File
@@ -0,0 +1,28 @@
# NOTE: dctl_client.c, dctl_common.c, dctl_server.c, dctl_thrpool.c unused
# by kernel port. Potentially they should just be removed if we don't care
# able user space lustre intergration from this source base.
# NOTE: For clarity this directly should simply be renamed libzpl and
# the full kernel implementation should be minimally stubbed out.
subdir-m += include
DISTFILES = dctl_client.c dctl_common.c dctl_server.c dctl_thrpool.c
DISTFILES += dmu_send.c rrwlock.c zfs_acl.c zfs_ctldir.c
DISTFILES += zfs_dir.c zfs_fuid.c zfs_ioctl.c zfs_log.c zfs_replay.c
DISTFILES += zfs_rlock.c zfs_vfsops.c zfs_vnops.c zvol.c
MODULE := zctl
EXTRA_CFLAGS = @KERNELCPPFLAGS@
EXTRA_CFLAGS += -I@LIBDIR@/libzcommon/include
EXTRA_CFLAGS += -I@LIBDIR@/libdmu-ctl/include
EXTRA_CFLAGS += -I@LIBDIR@/libavl/include
EXTRA_CFLAGS += -I@LIBDIR@/libport/include
EXTRA_CFLAGS += -I@LIBDIR@/libnvpair/include
obj-m := ${MODULE}.o
${MODULE}-objs += zvol.o # Volume emulation interface
${MODULE}-objs += zfs_ioctl.o # /dev/zfs_ioctl interface
${MODULE}-objs += zfs_vfsops.o
${MODULE}-objs += dmu_send.o
+263
View File
@@ -0,0 +1,263 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ftw.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/debug.h>
#include <sys/dmu_ctl.h>
#include <sys/dmu_ctl_impl.h>
/*
* Try to connect to the socket given in path.
*
* For nftw() convenience, returns 0 if unsuccessful, otherwise
* returns the socket descriptor.
*/
static int try_connect(const char *path)
{
struct sockaddr_un name;
int sock;
sock = socket(PF_UNIX, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
return 0;
}
/*
* The socket fd cannot be 0 otherwise nftw() will not interpret the
* return code correctly.
*/
VERIFY(sock != 0);
name.sun_family = AF_UNIX;
strncpy(name.sun_path, path, sizeof(name.sun_path));
name.sun_path[sizeof(name.sun_path) - 1] = '\0';
if (connect(sock, (struct sockaddr *) &name, sizeof(name)) == -1) {
close(sock);
return 0;
}
return sock;
}
/*
* nftw() callback.
*/
static int nftw_cb(const char *fpath, const struct stat *sb, int typeflag,
struct FTW *ftwbuf)
{
if (!S_ISSOCK(sb->st_mode))
return 0;
if (strcmp(&fpath[ftwbuf->base], SOCKNAME) != 0)
return 0;
return try_connect(fpath);
}
/*
* For convenience, if check_subdirs is true we walk the directory tree to
* find a good socket.
*/
int dctlc_connect(const char *dir, boolean_t check_subdirs)
{
char *fpath;
int fd;
if (check_subdirs)
fd = nftw(dir, nftw_cb, 10, FTW_PHYS);
else {
fpath = malloc(strlen(dir) + strlen(SOCKNAME) + 2);
if (fpath == NULL)
return -1;
strcpy(fpath, dir);
strcat(fpath, "/" SOCKNAME);
fd = try_connect(fpath);
free(fpath);
}
return fd == 0 ? -1 : fd;
}
void dctlc_disconnect(int fd)
{
(void) shutdown(fd, SHUT_RDWR);
}
static int dctl_reply_copyin(int fd, dctl_cmd_t *cmd)
{
return dctl_send_data(fd, (void *)(uintptr_t) cmd->u.dcmd_copy.ptr,
cmd->u.dcmd_copy.size);
}
static int dctl_reply_copyinstr(int fd, dctl_cmd_t *cmd)
{
dctl_cmd_t reply;
char *from;
size_t len, buflen, to_copy;
int error;
reply.dcmd_msg = DCTL_GEN_REPLY;
from = (char *)(uintptr_t) cmd->u.dcmd_copy.ptr;
buflen = cmd->u.dcmd_copy.size;
to_copy = strnlen(from, buflen - 1);
reply.u.dcmd_reply.rc = from[to_copy] == '\0' ? 0 : ENAMETOOLONG;
reply.u.dcmd_reply.size = to_copy;
error = dctl_send_msg(fd, &reply);
if (!error && to_copy > 0)
error = dctl_send_data(fd, from, to_copy);
return error;
}
static int dctl_reply_copyout(int fd, dctl_cmd_t *cmd)
{
return dctl_read_data(fd, (void *)(uintptr_t) cmd->u.dcmd_copy.ptr,
cmd->u.dcmd_copy.size);
}
static int dctl_reply_fd_read(int fd, dctl_cmd_t *cmd)
{
dctl_cmd_t reply;
void *buf;
int error;
ssize_t rrc, size = cmd->u.dcmd_fd_io.size;
buf = malloc(size);
if (buf == NULL)
return ENOMEM;
rrc = read(cmd->u.dcmd_fd_io.fd, buf, size);
reply.dcmd_msg = DCTL_GEN_REPLY;
reply.u.dcmd_reply.rc = rrc == -1 ? errno : 0;
reply.u.dcmd_reply.size = rrc;
error = dctl_send_msg(fd, &reply);
if (!error && rrc > 0)
error = dctl_send_data(fd, buf, rrc);
out:
free(buf);
return error;
}
static int dctl_reply_fd_write(int fd, dctl_cmd_t *cmd)
{
dctl_cmd_t reply;
void *buf;
int error;
ssize_t wrc, size = cmd->u.dcmd_fd_io.size;
buf = malloc(size);
if (buf == NULL)
return ENOMEM;
error = dctl_read_data(fd, buf, size);
if (error)
goto out;
wrc = write(cmd->u.dcmd_fd_io.fd, buf, size);
reply.dcmd_msg = DCTL_GEN_REPLY;
reply.u.dcmd_reply.rc = wrc == -1 ? errno : 0;
reply.u.dcmd_reply.size = wrc;
error = dctl_send_msg(fd, &reply);
out:
free(buf);
return error;
}
int dctlc_ioctl(int fd, int32_t request, void *arg)
{
int error;
dctl_cmd_t cmd;
ASSERT(fd != 0);
cmd.dcmd_msg = DCTL_IOCTL;
cmd.u.dcmd_ioctl.cmd = request;
cmd.u.dcmd_ioctl.arg = (uintptr_t) arg;
error = dctl_send_msg(fd, &cmd);
while (!error && (error = dctl_read_msg(fd, &cmd)) == 0) {
switch (cmd.dcmd_msg) {
case DCTL_IOCTL_REPLY:
error = cmd.u.dcmd_reply.rc;
goto out;
case DCTL_COPYIN:
error = dctl_reply_copyin(fd, &cmd);
break;
case DCTL_COPYINSTR:
error = dctl_reply_copyinstr(fd, &cmd);
break;
case DCTL_COPYOUT:
error = dctl_reply_copyout(fd, &cmd);
break;
case DCTL_FD_READ:
error = dctl_reply_fd_read(fd, &cmd);
break;
case DCTL_FD_WRITE:
error = dctl_reply_fd_write(fd, &cmd);
break;
default:
fprintf(stderr, "%s(): invalid message "
"received.\n", __func__);
error = EINVAL;
goto out;
}
}
out:
errno = error;
return error ? -1 : 0;
}
+109
View File
@@ -0,0 +1,109 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/dmu_ctl.h>
#include <sys/dmu_ctl_impl.h>
int dctl_read_msg(int fd, dctl_cmd_t *cmd)
{
int error;
/*
* First, read only the magic number and the protocol version.
*
* This prevents blocking forever in case the size of dctl_cmd_t
* shrinks in future protocol versions.
*/
error = dctl_read_data(fd, cmd, DCTL_CMD_HEADER_SIZE);
if (!error &&cmd->dcmd_magic != DCTL_MAGIC) {
fprintf(stderr, "%s(): invalid magic number\n", __func__);
error = EIO;
}
if (!error && cmd->dcmd_version != DCTL_PROTOCOL_VER) {
fprintf(stderr, "%s(): invalid protocol version\n", __func__);
error = ENOTSUP;
}
if (error)
return error;
/* Get the rest of the command */
return dctl_read_data(fd, (caddr_t) cmd + DCTL_CMD_HEADER_SIZE,
sizeof(dctl_cmd_t) - DCTL_CMD_HEADER_SIZE);
}
int dctl_send_msg(int fd, dctl_cmd_t *cmd)
{
cmd->dcmd_magic = DCTL_MAGIC;
cmd->dcmd_version = DCTL_PROTOCOL_VER;
return dctl_send_data(fd, cmd, sizeof(dctl_cmd_t));
}
int dctl_read_data(int fd, void *ptr, size_t size)
{
size_t read = 0;
size_t left = size;
ssize_t rc;
while (left > 0) {
rc = recv(fd, (caddr_t) ptr + read, left, 0);
/* File descriptor closed */
if (rc == 0)
return ECONNRESET;
if (rc == -1) {
if (errno == EINTR)
continue;
return errno;
}
read += rc;
left -= rc;
}
return 0;
}
int dctl_send_data(int fd, const void *ptr, size_t size)
{
ssize_t rc;
do {
rc = send(fd, ptr, size, MSG_NOSIGNAL);
} while(rc == -1 && errno == EINTR);
return rc == size ? 0 : EIO;
}
+476
View File
@@ -0,0 +1,476 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <limits.h>
#include <errno.h>
#include <poll.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/debug.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/list.h>
#include <sys/cred.h>
#include <sys/dmu_ctl.h>
#include <sys/dmu_ctl_impl.h>
static dctl_sock_info_t ctl_sock = {
.dsi_mtx = PTHREAD_MUTEX_INITIALIZER,
.dsi_fd = -1
};
static int dctl_create_socket_common();
/*
* Routines from zfs_ioctl.c
*/
extern int zfs_ioctl_init();
extern int zfs_ioctl_fini();
extern int zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
int *rvalp);
/*
* We can't simply put the client file descriptor in wthr_info_t because we
* have no way of accessing it from the DMU code without extensive
* modifications.
*
* Therefore each worker thread will have it's own global thread-specific
* client_fd variable.
*/
static __thread int client_fd = -1;
int dctls_copyin(const void *src, void *dest, size_t size)
{
dctl_cmd_t cmd;
VERIFY(client_fd >= 0);
cmd.dcmd_msg = DCTL_COPYIN;
cmd.u.dcmd_copy.ptr = (uintptr_t) src;
cmd.u.dcmd_copy.size = size;
if (dctl_send_msg(client_fd, &cmd) != 0)
return EFAULT;
if (dctl_read_data(client_fd, dest, size) != 0)
return EFAULT;
return 0;
}
int dctls_copyinstr(const char *from, char *to, size_t max, size_t *len)
{
dctl_cmd_t msg;
size_t copied;
VERIFY(client_fd >= 0);
if (max == 0)
return ENAMETOOLONG;
if (max < 0)
return EFAULT;
msg.dcmd_msg = DCTL_COPYINSTR;
msg.u.dcmd_copy.ptr = (uintptr_t) from;
msg.u.dcmd_copy.size = max;
if (dctl_send_msg(client_fd, &msg) != 0)
return EFAULT;
if (dctl_read_msg(client_fd, &msg) != 0)
return EFAULT;
if (msg.dcmd_msg != DCTL_GEN_REPLY)
return EFAULT;
copied = msg.u.dcmd_reply.size;
if (copied >= max)
return EFAULT;
if (copied > 0)
if (dctl_read_data(client_fd, to, copied) != 0)
return EFAULT;
to[copied] = '\0';
if (len != NULL)
*len = copied + 1;
return msg.u.dcmd_reply.rc;
}
int dctls_copyout(const void *src, void *dest, size_t size)
{
dctl_cmd_t cmd;
VERIFY(client_fd >= 0);
cmd.dcmd_msg = DCTL_COPYOUT;
cmd.u.dcmd_copy.ptr = (uintptr_t) dest;
cmd.u.dcmd_copy.size = size;
if (dctl_send_msg(client_fd, &cmd) != 0)
return EFAULT;
if (dctl_send_data(client_fd, src, size) != 0)
return EFAULT;
return 0;
}
int dctls_fd_read(int fd, void *buf, ssize_t len, ssize_t *residp)
{
dctl_cmd_t msg;
uint64_t dsize;
int error;
VERIFY(client_fd >= 0);
msg.dcmd_msg = DCTL_FD_READ;
msg.u.dcmd_fd_io.fd = fd;
msg.u.dcmd_fd_io.size = len;
if ((error = dctl_send_msg(client_fd, &msg)) != 0)
return error;
if ((error = dctl_read_msg(client_fd, &msg)) != 0)
return error;
if (msg.dcmd_msg != DCTL_GEN_REPLY)
return EIO;
if (msg.u.dcmd_reply.rc != 0)
return msg.u.dcmd_reply.rc;
dsize = msg.u.dcmd_reply.size;
if (dsize > 0)
error = dctl_read_data(client_fd, buf, dsize);
*residp = len - dsize;
return error;
}
int dctls_fd_write(int fd, const void *src, ssize_t len)
{
dctl_cmd_t msg;
int error;
VERIFY(client_fd >= 0);
msg.dcmd_msg = DCTL_FD_WRITE;
msg.u.dcmd_fd_io.fd = fd;
msg.u.dcmd_fd_io.size = len;
error = dctl_send_msg(client_fd, &msg);
if (!error)
error = dctl_send_data(client_fd, src, len);
if (!error)
error = dctl_read_msg(client_fd, &msg);
if (error)
return error;
if (msg.dcmd_msg != DCTL_GEN_REPLY)
return EIO;
if (msg.u.dcmd_reply.rc != 0)
return msg.u.dcmd_reply.rc;
/*
* We have to do this because the original upstream code
* does not check if residp == len.
*/
if (msg.u.dcmd_reply.size != len)
return EIO;
return 0;
}
/* Handle a new connection */
static void dctl_handle_conn(int sock_fd)
{
dctl_cmd_t cmd;
dev_t dev = { 0 };
int rc;
client_fd = sock_fd;
while (dctl_read_msg(sock_fd, &cmd) == 0) {
if (cmd.dcmd_msg != DCTL_IOCTL) {
fprintf(stderr, "%s(): unexpected message type.\n",
__func__);
break;
}
rc = zfsdev_ioctl(dev, cmd.u.dcmd_ioctl.cmd,
(intptr_t) cmd.u.dcmd_ioctl.arg, 0, NULL, NULL);
cmd.dcmd_msg = DCTL_IOCTL_REPLY;
cmd.u.dcmd_reply.rc = rc;
if (dctl_send_msg(sock_fd, &cmd) != 0)
break;
}
close(sock_fd);
client_fd = -1;
}
/* Main worker thread loop */
static void *dctl_thread(void *arg)
{
wthr_info_t *thr = arg;
struct pollfd fds[1];
fds[0].events = POLLIN;
pthread_mutex_lock(&ctl_sock.dsi_mtx);
while (!thr->wthr_exit) {
/* Clean-up dead threads */
dctl_thr_join();
/* The file descriptor might change in the thread lifetime */
fds[0].fd = ctl_sock.dsi_fd;
/* Poll socket with 1-second timeout */
int rc = poll(fds, 1, 1000);
if (rc == 0 || (rc == -1 && errno == EINTR))
continue;
/* Recheck the exit flag */
if (thr->wthr_exit)
break;
if (rc == -1) {
/* Unknown error, let's try to recreate the socket */
close(ctl_sock.dsi_fd);
ctl_sock.dsi_fd = -1;
if (dctl_create_socket_common() != 0)
break;
continue;
}
ASSERT(rc == 1);
short rev = fds[0].revents;
if (rev == 0)
continue;
ASSERT(rev == POLLIN);
/*
* At this point there should be a connection ready to be
* accepted.
*/
int client_fd = accept(ctl_sock.dsi_fd, NULL, NULL);
/* Many possible errors here, we'll just retry */
if (client_fd == -1)
continue;
/*
* Now lets handle the request. This can take a very
* long time (hours even), so we'll let other threads
* handle new connections.
*/
pthread_mutex_unlock(&ctl_sock.dsi_mtx);
dctl_thr_rebalance(thr, B_FALSE);
dctl_handle_conn(client_fd);
dctl_thr_rebalance(thr, B_TRUE);
pthread_mutex_lock(&ctl_sock.dsi_mtx);
}
pthread_mutex_unlock(&ctl_sock.dsi_mtx);
dctl_thr_die(thr);
return NULL;
}
static int dctl_create_socket_common()
{
dctl_sock_info_t *s = &ctl_sock;
size_t size;
int error;
ASSERT(s->dsi_fd == -1);
/*
* Unlink old socket, in case it exists.
* We don't care about errors here.
*/
unlink(s->dsi_path);
/* Create the socket */
s->dsi_fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (s->dsi_fd == -1) {
error = errno;
perror("socket");
return error;
}
s->dsi_addr.sun_family = AF_UNIX;
size = sizeof(s->dsi_addr.sun_path) - 1;
strncpy(s->dsi_addr.sun_path, s->dsi_path, size);
s->dsi_addr.sun_path[size] = '\0';
if (bind(s->dsi_fd, (struct sockaddr *) &s->dsi_addr,
sizeof(s->dsi_addr)) != 0) {
error = errno;
perror("bind");
return error;
}
if (listen(s->dsi_fd, LISTEN_BACKLOG) != 0) {
error = errno;
perror("listen");
unlink(s->dsi_path);
return error;
}
return 0;
}
static int dctl_create_socket(const char *cfg_dir)
{
int error;
dctl_sock_info_t *s = &ctl_sock;
ASSERT(s->dsi_path == NULL);
ASSERT(s->dsi_fd == -1);
int pathsize = strlen(cfg_dir) + strlen(SOCKNAME) + 2;
if (pathsize > sizeof(s->dsi_addr.sun_path))
return ENAMETOOLONG;
s->dsi_path = malloc(pathsize);
if (s->dsi_path == NULL)
return ENOMEM;
strcpy(s->dsi_path, cfg_dir);
strcat(s->dsi_path, "/" SOCKNAME);
/*
* For convenience, create the directory in case it doesn't exist.
* We don't care about errors here.
*/
mkdir(cfg_dir, 0770);
error = dctl_create_socket_common();
if (error) {
free(s->dsi_path);
if (s->dsi_fd != -1) {
close(s->dsi_fd);
s->dsi_fd = -1;
}
}
return error;
}
static void dctl_destroy_socket()
{
dctl_sock_info_t *s = &ctl_sock;
ASSERT(s->dsi_path != NULL);
ASSERT(s->dsi_fd != -1);
close(s->dsi_fd);
s->dsi_fd = -1;
unlink(s->dsi_path);
free(s->dsi_path);
}
/*
* Initialize the DMU userspace control interface.
* This should be called after kernel_init().
*
* Note that only very rarely we have more than a couple of simultaneous
* lzfs/lzpool connections. Since the thread pool grows automatically when all
* threads are busy, a good value for min_thr and max_free_thr is 2.
*/
int dctl_server_init(const char *cfg_dir, int min_thr, int max_free_thr)
{
int error;
ASSERT(min_thr > 0);
ASSERT(max_free_thr >= min_thr);
error = zfs_ioctl_init();
if (error)
return error;
error = dctl_create_socket(cfg_dir);
if (error) {
(void) zfs_ioctl_fini();
return error;
}
error = dctl_thr_pool_create(min_thr, max_free_thr, dctl_thread);
if (error) {
(void) zfs_ioctl_fini();
dctl_destroy_socket();
return error;
}
return 0;
}
/*
* Terminate control interface.
* This should be called after closing all objsets, but before calling
* kernel_fini().
* May return EBUSY if the SPA is busy.
*
* Thread pool destruction can take a while due to poll()
* timeout or due to a thread being busy (e.g. a backup is being taken).
*/
int dctl_server_fini()
{
dctl_thr_pool_stop();
dctl_destroy_socket();
return zfs_ioctl_fini();
}
+253
View File
@@ -0,0 +1,253 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <stdlib.h>
#include <stddef.h>
#include <time.h>
#include <pthread.h>
#include <errno.h>
#include <sys/list.h>
#include <sys/debug.h>
#include <sys/dmu_ctl.h>
#include <sys/dmu_ctl_impl.h>
static dctl_thr_info_t thr_pool = {
.dti_mtx = PTHREAD_MUTEX_INITIALIZER
};
/*
* Create n threads.
* Callers must acquire thr_pool.dti_mtx first.
*/
static int dctl_thr_create(int n)
{
dctl_thr_info_t *p = &thr_pool;
int error;
for (int i = 0; i < n; i++) {
wthr_info_t *thr = malloc(sizeof(wthr_info_t));
if (thr == NULL)
return ENOMEM;
thr->wthr_exit = B_FALSE;
thr->wthr_free = B_TRUE;
error = pthread_create(&thr->wthr_id, NULL, p->dti_thr_func,
thr);
if (error) {
free(thr);
return error;
}
p->dti_free++;
list_insert_tail(&p->dti_list, thr);
}
return 0;
}
/*
* Mark the thread as dead.
* Must be called right before exiting the main thread function.
*/
void dctl_thr_die(wthr_info_t *thr)
{
dctl_thr_info_t *p = &thr_pool;
thr->wthr_exit = B_TRUE;
dctl_thr_rebalance(thr, B_FALSE);
pthread_mutex_lock(&p->dti_mtx);
list_remove(&p->dti_list, thr);
list_insert_tail(&p->dti_join_list, thr);
pthread_mutex_unlock(&p->dti_mtx);
}
/*
* Clean-up dead threads.
*/
void dctl_thr_join()
{
dctl_thr_info_t *p = &thr_pool;
wthr_info_t *thr;
pthread_mutex_lock(&p->dti_mtx);
while ((thr = list_head(&p->dti_join_list))) {
list_remove(&p->dti_join_list, thr);
ASSERT(!pthread_equal(thr->wthr_id, pthread_self()));
/*
* This should not block because all the threads
* on this list should have died already.
*
* pthread_join() can only return an error if
* we made a programming mistake.
*/
VERIFY(pthread_join(thr->wthr_id, NULL) == 0);
ASSERT(thr->wthr_exit);
ASSERT(!thr->wthr_free);
free(thr);
}
pthread_mutex_unlock(&p->dti_mtx);
}
/*
* Adjust the number of free threads in the pool and the thread status.
*
* Callers must acquire thr_pool.dti_mtx first.
*/
static void dctl_thr_adjust_free(wthr_info_t *thr, boolean_t set_free)
{
dctl_thr_info_t *p = &thr_pool;
ASSERT(p->dti_free >= 0);
if (!thr->wthr_free && set_free)
p->dti_free++;
else if (thr->wthr_free && !set_free)
p->dti_free--;
ASSERT(p->dti_free >= 0);
thr->wthr_free = set_free;
}
/*
* Rebalance threads. Also adjusts the free status of the thread.
* Will set the thread exit flag if the number of free threads is above
* the limit.
*/
void dctl_thr_rebalance(wthr_info_t *thr, boolean_t set_free)
{
dctl_thr_info_t *p = &thr_pool;
pthread_mutex_lock(&p->dti_mtx);
if (p->dti_exit || p->dti_free > p->dti_max_free)
thr->wthr_exit = B_TRUE;
if (thr->wthr_exit)
set_free = B_FALSE;
dctl_thr_adjust_free(thr, set_free);
if (!p->dti_exit && p->dti_free == 0)
dctl_thr_create(1);
pthread_mutex_unlock(&p->dti_mtx);
}
/*
* Stop the thread pool.
*
* This can take a while since it actually waits for all threads to exit.
*/
void dctl_thr_pool_stop()
{
dctl_thr_info_t *p = &thr_pool;
wthr_info_t *thr;
struct timespec ts;
pthread_mutex_lock(&p->dti_mtx);
ASSERT(!p->dti_exit);
p->dti_exit = B_TRUE;
/* Let's flag the threads first */
thr = list_head(&p->dti_list);
while (thr != NULL) {
thr->wthr_exit = B_TRUE;
dctl_thr_adjust_free(thr, B_FALSE);
thr = list_next(&p->dti_list, thr);
}
pthread_mutex_unlock(&p->dti_mtx);
/* Now let's wait for them to exit */
ts.tv_sec = 0;
ts.tv_nsec = 50000000; /* 50ms */
do {
nanosleep(&ts, NULL);
pthread_mutex_lock(&p->dti_mtx);
thr = list_head(&p->dti_list);
pthread_mutex_unlock(&p->dti_mtx);
dctl_thr_join();
} while(thr != NULL);
ASSERT(p->dti_free == 0);
ASSERT(list_is_empty(&p->dti_list));
ASSERT(list_is_empty(&p->dti_join_list));
list_destroy(&p->dti_list);
list_destroy(&p->dti_join_list);
}
/*
* Create thread pool.
*
* If at least one thread creation fails, it will stop all previous
* threads and return a non-zero value.
*/
int dctl_thr_pool_create(int min_thr, int max_free_thr,
thr_func_t *thr_func)
{
int error;
dctl_thr_info_t *p = &thr_pool;
ASSERT(p->dti_free == 0);
/* Initialize global variables */
p->dti_min = min_thr;
p->dti_max_free = max_free_thr;
p->dti_exit = B_FALSE;
p->dti_thr_func = thr_func;
list_create(&p->dti_list, sizeof(wthr_info_t), offsetof(wthr_info_t,
wthr_node));
list_create(&p->dti_join_list, sizeof(wthr_info_t),
offsetof(wthr_info_t, wthr_node));
pthread_mutex_lock(&p->dti_mtx);
error = dctl_thr_create(min_thr);
pthread_mutex_unlock(&p->dti_mtx);
if (error)
dctl_thr_pool_stop();
return error;
}
File diff suppressed because it is too large Load Diff
+1
View File
@@ -0,0 +1 @@
subdir-m += sys
@@ -0,0 +1 @@
DISTFILES = dmu_ctl.h dmu_ctl_impl.h
+71
View File
@@ -0,0 +1,71 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_CTL_H
#define _SYS_DMU_CTL_H
#include <sys/types.h>
/* Default directory where the clients search for sockets to connect */
#define DMU_CTL_DEFAULT_DIR "/var/run/zfs/udmu"
/*
* These functions are called by the server process.
*
* kernel_init() must be called before dctl_server_init().
* kernel_fini() must not be called before dctl_server_fini().
*
* All objsets must be closed and object references be released before calling
* dctl_server_fini(), otherwise it will return EBUSY.
*
* Note: On Solaris, it is highly recommended to either catch or ignore the
* SIGPIPE signal, otherwise the server process will die if the client is
* killed.
*/
int dctl_server_init(const char *cfg_dir, int min_threads,
int max_free_threads);
int dctl_server_fini();
/*
* The following functions are called by the DMU from the server process context
* (in the worker threads).
*/
int dctls_copyin(const void *src, void *dest, size_t size);
int dctls_copyinstr(const char *from, char *to, size_t max,
size_t *len);
int dctls_copyout(const void *src, void *dest, size_t size);
int dctls_fd_read(int fd, void *buf, ssize_t len, ssize_t *residp);
int dctls_fd_write(int fd, const void *src, ssize_t len);
/*
* These functions are called by the client process (libzfs).
*/
int dctlc_connect(const char *dir, boolean_t check_subdirs);
void dctlc_disconnect(int fd);
int dctlc_ioctl(int fd, int32_t request, void *arg);
#endif
@@ -0,0 +1,144 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_CTL_IMPL_H
#define _SYS_DMU_CTL_IMPL_H
#include <sys/list.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <pthread.h>
#define SOCKNAME "dmu_socket"
#define DCTL_PROTOCOL_VER 1
#define DCTL_MAGIC 0xdc71b1070c01dc71ll
/* Message types */
enum {
DCTL_IOCTL,
DCTL_IOCTL_REPLY,
DCTL_COPYIN,
DCTL_COPYINSTR,
DCTL_COPYOUT,
DCTL_FD_READ,
DCTL_FD_WRITE,
DCTL_GEN_REPLY /* generic reply */
};
/* On-the-wire message */
typedef struct dctl_cmd {
uint64_t dcmd_magic;
int8_t dcmd_version;
int8_t dcmd_msg;
uint8_t dcmd_pad[6];
union {
struct dcmd_ioctl {
uint64_t arg;
int32_t cmd;
uint8_t pad[4];
} dcmd_ioctl;
struct dcmd_copy_req {
uint64_t ptr;
uint64_t size;
} dcmd_copy;
struct dcmd_fd_req {
int64_t size;
int32_t fd;
uint8_t pad[4];
} dcmd_fd_io;
struct dcmd_reply {
uint64_t size; /* used by reply to DCTL_COPYINSTR,
DCTL_FD_READ and DCTL_FD_WRITE */
int32_t rc; /* return code */
uint8_t pad[4];
} dcmd_reply;
} u;
} dctl_cmd_t;
#define DCTL_CMD_HEADER_SIZE (sizeof(uint64_t) + sizeof(uint8_t))
/*
* The following definitions are only used by the server code.
*/
#define LISTEN_BACKLOG 5
/* Worker thread data */
typedef struct wthr_info {
list_node_t wthr_node;
pthread_t wthr_id;
boolean_t wthr_exit; /* termination flag */
boolean_t wthr_free;
} wthr_info_t;
/* Control socket data */
typedef struct dctl_sock_info {
pthread_mutex_t dsi_mtx;
char *dsi_path;
struct sockaddr_un dsi_addr;
int dsi_fd;
} dctl_sock_info_t;
typedef void *thr_func_t(void *);
/* Thread pool data */
typedef struct dctl_thr_info {
thr_func_t *dti_thr_func;
pthread_mutex_t dti_mtx; /* protects the thread lists and dti_free */
list_t dti_list; /* list of threads in the thread pool */
list_t dti_join_list; /* list of threads that are waiting to be
joined */
int dti_free; /* number of free worker threads */
int dti_min;
int dti_max_free;
boolean_t dti_exit; /* global termination flag */
} dctl_thr_info_t;
/* Messaging functions functions */
int dctl_read_msg(int fd, dctl_cmd_t *cmd);
int dctl_send_msg(int fd, dctl_cmd_t *cmd);
int dctl_read_data(int fd, void *ptr, size_t size);
int dctl_send_data(int fd, const void *ptr, size_t size);
/* Thread pool functions */
int dctl_thr_pool_create(int min_thr, int max_free_thr,
thr_func_t *thr_func);
void dctl_thr_pool_stop();
void dctl_thr_join();
void dctl_thr_die(wthr_info_t *thr);
void dctl_thr_rebalance(wthr_info_t *thr, boolean_t set_free);
#endif
+249
View File
@@ -0,0 +1,249 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)rrwlock.c 1.1 07/10/24 SMI"
#include <sys/refcount.h>
#include <sys/rrwlock.h>
/*
* This file contains the implementation of a re-entrant read
* reader/writer lock (aka "rrwlock").
*
* This is a normal reader/writer lock with the additional feature
* of allowing threads who have already obtained a read lock to
* re-enter another read lock (re-entrant read) - even if there are
* waiting writers.
*
* Callers who have not obtained a read lock give waiting writers priority.
*
* The rrwlock_t lock does not allow re-entrant writers, nor does it
* allow a re-entrant mix of reads and writes (that is, it does not
* allow a caller who has already obtained a read lock to be able to
* then grab a write lock without first dropping all read locks, and
* vice versa).
*
* The rrwlock_t uses tsd (thread specific data) to keep a list of
* nodes (rrw_node_t), where each node keeps track of which specific
* lock (rrw_node_t::rn_rrl) the thread has grabbed. Since re-entering
* should be rare, a thread that grabs multiple reads on the same rrwlock_t
* will store multiple rrw_node_ts of the same 'rrn_rrl'. Nodes on the
* tsd list can represent a different rrwlock_t. This allows a thread
* to enter multiple and unique rrwlock_ts for read locks at the same time.
*
* Since using tsd exposes some overhead, the rrwlock_t only needs to
* keep tsd data when writers are waiting. If no writers are waiting, then
* a reader just bumps the anonymous read count (rr_anon_rcount) - no tsd
* is needed. Once a writer attempts to grab the lock, readers then
* keep tsd data and bump the linked readers count (rr_linked_rcount).
*
* If there are waiting writers and there are anonymous readers, then a
* reader doesn't know if it is a re-entrant lock. But since it may be one,
* we allow the read to proceed (otherwise it could deadlock). Since once
* waiting writers are active, readers no longer bump the anonymous count,
* the anonymous readers will eventually flush themselves out. At this point,
* readers will be able to tell if they are a re-entrant lock (have a
* rrw_node_t entry for the lock) or not. If they are a re-entrant lock, then
* we must let the proceed. If they are not, then the reader blocks for the
* waiting writers. Hence, we do not starve writers.
*/
/* global key for TSD */
uint_t rrw_tsd_key;
typedef struct rrw_node {
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
} rrw_node_t;
static rrw_node_t *
rrn_find(rrwlock_t *rrl)
{
rrw_node_t *rn;
if (refcount_count(&rrl->rr_linked_rcount) == 0)
return (NULL);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl)
return (rn);
}
return (NULL);
}
/*
* Add a node to the head of the singly linked list.
*/
static void
rrn_add(rrwlock_t *rrl)
{
rrw_node_t *rn;
rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
}
/*
* If a node is found for 'rrl', then remove the node from this
* thread's list and return TRUE; otherwise return FALSE.
*/
static boolean_t
rrn_find_and_remove(rrwlock_t *rrl)
{
rrw_node_t *rn;
rrw_node_t *prev = NULL;
if (refcount_count(&rrl->rr_linked_rcount) == 0)
return (NULL);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl) {
if (prev)
prev->rn_next = rn->rn_next;
else
VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0);
kmem_free(rn, sizeof (*rn));
return (B_TRUE);
}
prev = rn;
}
return (B_FALSE);
}
void
rrw_init(rrwlock_t *rrl)
{
mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
rrl->rr_writer = NULL;
refcount_create(&rrl->rr_anon_rcount);
refcount_create(&rrl->rr_linked_rcount);
rrl->rr_writer_wanted = B_FALSE;
}
void
rrw_destroy(rrwlock_t *rrl)
{
mutex_destroy(&rrl->rr_lock);
cv_destroy(&rrl->rr_cv);
ASSERT(rrl->rr_writer == NULL);
refcount_destroy(&rrl->rr_anon_rcount);
refcount_destroy(&rrl->rr_linked_rcount);
}
static void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
while (rrl->rr_writer || (rrl->rr_writer_wanted &&
refcount_is_zero(&rrl->rr_anon_rcount) &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
if (rrl->rr_writer_wanted) {
/* may or may not be a re-entrant enter */
rrn_add(rrl);
(void) refcount_add(&rrl->rr_linked_rcount, tag);
} else {
(void) refcount_add(&rrl->rr_anon_rcount, tag);
}
ASSERT(rrl->rr_writer == NULL);
mutex_exit(&rrl->rr_lock);
}
static void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
ASSERT(rrl->rr_writer != curthread);
while (refcount_count(&rrl->rr_anon_rcount) > 0 ||
refcount_count(&rrl->rr_linked_rcount) > 0 ||
rrl->rr_writer != NULL) {
rrl->rr_writer_wanted = B_TRUE;
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
}
rrl->rr_writer_wanted = B_FALSE;
rrl->rr_writer = curthread;
mutex_exit(&rrl->rr_lock);
}
void
rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag)
{
if (rw == RW_READER)
rrw_enter_read(rrl, tag);
else
rrw_enter_write(rrl);
}
void
rrw_exit(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount) ||
rrl->rr_writer != NULL);
if (rrl->rr_writer == NULL) {
if (rrn_find_and_remove(rrl)) {
if (refcount_remove(&rrl->rr_linked_rcount, tag) == 0)
cv_broadcast(&rrl->rr_cv);
} else {
if (refcount_remove(&rrl->rr_anon_rcount, tag) == 0)
cv_broadcast(&rrl->rr_cv);
}
} else {
ASSERT(rrl->rr_writer == curthread);
ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&
refcount_is_zero(&rrl->rr_linked_rcount));
rrl->rr_writer = NULL;
cv_broadcast(&rrl->rr_cv);
}
mutex_exit(&rrl->rr_lock);
}
boolean_t
rrw_held(rrwlock_t *rrl, krw_t rw)
{
boolean_t held;
mutex_enter(&rrl->rr_lock);
if (rw == RW_WRITER) {
held = (rrl->rr_writer == curthread);
} else {
held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount));
}
mutex_exit(&rrl->rr_lock);
return (held);
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+968
View File
@@ -0,0 +1,968 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)zfs_dir.c 1.25 08/04/27 SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/mode.h>
#include <sys/kmem.h>
#include <sys/uio.h>
#include <sys/pathname.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/unistd.h>
#include <sys/sunddi.h>
#include <sys/random.h>
#include <sys/policy.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_acl.h>
#include <sys/fs/zfs.h>
#include "fs/fs_subr.h"
#include <sys/zap.h>
#include <sys/dmu.h>
#include <sys/atomic.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_fuid.h>
#include <sys/dnlc.h>
#include <sys/extdirent.h>
/*
* zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
* of names after deciding which is the appropriate lookup interface.
*/
static int
zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
{
int error;
if (zfsvfs->z_norm) {
matchtype_t mt = MT_FIRST;
boolean_t conflict = B_FALSE;
size_t bufsz = 0;
char *buf = NULL;
if (rpnp) {
buf = rpnp->pn_buf;
bufsz = rpnp->pn_bufsize;
}
if (exact)
mt = MT_EXACT;
/*
* In the non-mixed case we only expect there would ever
* be one match, but we need to use the normalizing lookup.
*/
error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
zoid, mt, buf, bufsz, &conflict);
if (!error && deflags)
*deflags = conflict ? ED_CASE_CONFLICT : 0;
} else {
error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
}
*zoid = ZFS_DIRENT_OBJ(*zoid);
if (error == ENOENT && update)
dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
return (error);
}
/*
* Lock a directory entry. A dirlock on <dzp, name> protects that name
* in dzp's directory zap object. As long as you hold a dirlock, you can
* assume two things: (1) dzp cannot be reaped, and (2) no other thread
* can change the zap entry for (i.e. link or unlink) this name.
*
* Input arguments:
* dzp - znode for directory
* name - name of entry to lock
* flag - ZNEW: if the entry already exists, fail with EEXIST.
* ZEXISTS: if the entry does not exist, fail with ENOENT.
* ZSHARED: allow concurrent access with other ZSHARED callers.
* ZXATTR: we want dzp's xattr directory
* ZCILOOK: On a mixed sensitivity file system,
* this lookup should be case-insensitive.
* ZCIEXACT: On a purely case-insensitive file system,
* this lookup should be case-sensitive.
* ZRENAMING: we are locking for renaming, force narrow locks
*
* Output arguments:
* zpp - pointer to the znode for the entry (NULL if there isn't one)
* dlpp - pointer to the dirlock for this entry (NULL on error)
* direntflags - (case-insensitive lookup only)
* flags if multiple case-sensitive matches exist in directory
* realpnp - (case-insensitive lookup only)
* actual name matched within the directory
*
* Return value: 0 on success or errno on failure.
*
* NOTE: Always checks for, and rejects, '.' and '..'.
* NOTE: For case-insensitive file systems we take wide locks (see below),
* but return znode pointers to a single match.
*/
int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
int flag, int *direntflags, pathname_t *realpnp)
{
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_dirlock_t *dl;
boolean_t update;
boolean_t exact;
uint64_t zoid;
vnode_t *vp = NULL;
int error = 0;
int cmpflags;
*zpp = NULL;
*dlpp = NULL;
/*
* Verify that we are not trying to lock '.', '..', or '.zfs'
*/
if (name[0] == '.' &&
(name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
return (EEXIST);
/*
* Case sensitivity and normalization preferences are set when
* the file system is created. These are stored in the
* zfsvfs->z_case and zfsvfs->z_norm fields. These choices
* affect what vnodes can be cached in the DNLC, how we
* perform zap lookups, and the "width" of our dirlocks.
*
* A normal dirlock locks a single name. Note that with
* normalization a name can be composed multiple ways, but
* when normalized, these names all compare equal. A wide
* dirlock locks multiple names. We need these when the file
* system is supporting mixed-mode access. It is sometimes
* necessary to lock all case permutations of file name at
* once so that simultaneous case-insensitive/case-sensitive
* behaves as rationally as possible.
*/
/*
* Decide if exact matches should be requested when performing
* a zap lookup on file systems supporting case-insensitive
* access.
*/
exact =
((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
/*
* Only look in or update the DNLC if we are looking for the
* name on a file system that does not require normalization
* or case folding. We can also look there if we happen to be
* on a non-normalizing, mixed sensitivity file system IF we
* are looking for the exact name.
*
* Maybe can add TO-UPPERed version of name to dnlc in ci-only
* case for performance improvement?
*/
update = !zfsvfs->z_norm ||
((zfsvfs->z_case == ZFS_CASE_MIXED) &&
!(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
/*
* ZRENAMING indicates we are in a situation where we should
* take narrow locks regardless of the file system's
* preferences for normalizing and case folding. This will
* prevent us deadlocking trying to grab the same wide lock
* twice if the two names happen to be case-insensitive
* matches.
*/
if (flag & ZRENAMING)
cmpflags = 0;
else
cmpflags = zfsvfs->z_norm;
/*
* Wait until there are no locks on this name.
*/
rw_enter(&dzp->z_name_lock, RW_READER);
mutex_enter(&dzp->z_lock);
for (;;) {
if (dzp->z_unlinked) {
mutex_exit(&dzp->z_lock);
rw_exit(&dzp->z_name_lock);
return (ENOENT);
}
for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
U8_UNICODE_LATEST, &error) == 0) || error != 0)
break;
}
if (error != 0) {
mutex_exit(&dzp->z_lock);
rw_exit(&dzp->z_name_lock);
return (ENOENT);
}
if (dl == NULL) {
/*
* Allocate a new dirlock and add it to the list.
*/
dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
dl->dl_name = name;
dl->dl_sharecnt = 0;
dl->dl_namesize = 0;
dl->dl_dzp = dzp;
dl->dl_next = dzp->z_dirlocks;
dzp->z_dirlocks = dl;
break;
}
if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
break;
cv_wait(&dl->dl_cv, &dzp->z_lock);
}
if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
/*
* We're the second shared reference to dl. Make a copy of
* dl_name in case the first thread goes away before we do.
* Note that we initialize the new name before storing its
* pointer into dl_name, because the first thread may load
* dl->dl_name at any time. He'll either see the old value,
* which is his, or the new shared copy; either is OK.
*/
dl->dl_namesize = strlen(dl->dl_name) + 1;
name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
bcopy(dl->dl_name, name, dl->dl_namesize);
dl->dl_name = name;
}
mutex_exit(&dzp->z_lock);
/*
* We have a dirlock on the name. (Note that it is the dirlock,
* not the dzp's z_lock, that protects the name in the zap object.)
* See if there's an object by this name; if so, put a hold on it.
*/
if (flag & ZXATTR) {
zoid = dzp->z_phys->zp_xattr;
error = (zoid == 0 ? ENOENT : 0);
} else {
if (update)
vp = dnlc_lookup(ZTOV(dzp), name);
if (vp == DNLC_NO_VNODE) {
VN_RELE(vp);
error = ENOENT;
} else if (vp) {
if (flag & ZNEW) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
return (EEXIST);
}
*dlpp = dl;
*zpp = VTOZ(vp);
return (0);
} else {
error = zfs_match_find(zfsvfs, dzp, name, exact,
update, direntflags, realpnp, &zoid);
}
}
if (error) {
if (error != ENOENT || (flag & ZEXISTS)) {
zfs_dirent_unlock(dl);
return (error);
}
} else {
if (flag & ZNEW) {
zfs_dirent_unlock(dl);
return (EEXIST);
}
error = zfs_zget(zfsvfs, zoid, zpp);
if (error) {
zfs_dirent_unlock(dl);
return (error);
}
if (!(flag & ZXATTR) && update)
dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
}
*dlpp = dl;
return (0);
}
/*
* Unlock this directory entry and wake anyone who was waiting for it.
*/
void
zfs_dirent_unlock(zfs_dirlock_t *dl)
{
znode_t *dzp = dl->dl_dzp;
zfs_dirlock_t **prev_dl, *cur_dl;
mutex_enter(&dzp->z_lock);
rw_exit(&dzp->z_name_lock);
if (dl->dl_sharecnt > 1) {
dl->dl_sharecnt--;
mutex_exit(&dzp->z_lock);
return;
}
prev_dl = &dzp->z_dirlocks;
while ((cur_dl = *prev_dl) != dl)
prev_dl = &cur_dl->dl_next;
*prev_dl = dl->dl_next;
cv_broadcast(&dl->dl_cv);
mutex_exit(&dzp->z_lock);
if (dl->dl_namesize != 0)
kmem_free(dl->dl_name, dl->dl_namesize);
cv_destroy(&dl->dl_cv);
kmem_free(dl, sizeof (*dl));
}
/*
* Look up an entry in a directory.
*
* NOTE: '.' and '..' are handled as special cases because
* no directory entries are actually stored for them. If this is
* the root of a filesystem, then '.zfs' is also treated as a
* special pseudo-directory.
*/
int
zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
int *deflg, pathname_t *rpnp)
{
zfs_dirlock_t *dl;
znode_t *zp;
int error = 0;
if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
*vpp = ZTOV(dzp);
VN_HOLD(*vpp);
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
/*
* If we are a snapshot mounted under .zfs, return
* the vp for the snapshot directory.
*/
if (dzp->z_phys->zp_parent == dzp->z_id &&
zfsvfs->z_parent != zfsvfs) {
error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
"snapshot", vpp, NULL, 0, NULL, kcred,
NULL, NULL, NULL);
return (error);
}
rw_enter(&dzp->z_parent_lock, RW_READER);
error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp);
if (error == 0)
*vpp = ZTOV(zp);
rw_exit(&dzp->z_parent_lock);
} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
*vpp = zfsctl_root(dzp);
} else {
int zf;
zf = ZEXISTS | ZSHARED;
if (flags & FIGNORECASE)
zf |= ZCILOOK;
error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
if (error == 0) {
*vpp = ZTOV(zp);
zfs_dirent_unlock(dl);
dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
}
rpnp = NULL;
}
if ((flags & FIGNORECASE) && rpnp && !error)
(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
return (error);
}
static char *
zfs_unlinked_hexname(char namebuf[17], uint64_t x)
{
char *name = &namebuf[16];
const char digits[16] = "0123456789abcdef";
*name = '\0';
do {
*--name = digits[x & 0xf];
x >>= 4;
} while (x != 0);
return (name);
}
/*
* unlinked Set (formerly known as the "delete queue") Error Handling
*
* When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
* don't specify the name of the entry that we will be manipulating. We
* also fib and say that we won't be adding any new entries to the
* unlinked set, even though we might (this is to lower the minimum file
* size that can be deleted in a full filesystem). So on the small
* chance that the nlink list is using a fat zap (ie. has more than
* 2000 entries), we *may* not pre-read a block that's needed.
* Therefore it is remotely possible for some of the assertions
* regarding the unlinked set below to fail due to i/o error. On a
* nondebug system, this will result in the space being leaked.
*/
void
zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
char obj_name[17];
int error;
ASSERT(zp->z_unlinked);
ASSERT3U(zp->z_phys->zp_links, ==, 0);
error = zap_add(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
zfs_unlinked_hexname(obj_name, zp->z_id), 8, 1, &zp->z_id, tx);
ASSERT3U(error, ==, 0);
}
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
void
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
{
zap_cursor_t zc;
zap_attribute_t zap;
dmu_object_info_t doi;
znode_t *zp;
int error;
/*
* Interate over the contents of the unlinked set.
*/
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
zap_cursor_retrieve(&zc, &zap) == 0;
zap_cursor_advance(&zc)) {
/*
* See what kind of object we have in list
*/
error = dmu_object_info(zfsvfs->z_os,
zap.za_first_integer, &doi);
if (error != 0)
continue;
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
/*
* We need to re-mark these list entries for deletion,
* so we pull them back into core and set zp->z_unlinked.
*/
error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
/*
* We may pick up znodes that are already marked for deletion.
* This could happen during the purge of an extended attribute
* directory. All we need to do is skip over them, since they
* are already in the system marked z_unlinked.
*/
if (error != 0)
continue;
zp->z_unlinked = B_TRUE;
VN_RELE(ZTOV(zp));
}
zap_cursor_fini(&zc);
}
/*
* Delete the entire contents of a directory. Return a count
* of the number of entries that could not be deleted. If we encounter
* an error, return a count of at least one so that the directory stays
* in the unlinked set.
*
* NOTE: this function assumes that the directory is inactive,
* so there is no need to lock its entries before deletion.
* Also, it assumes the directory contents is *only* regular
* files.
*/
static int
zfs_purgedir(znode_t *dzp)
{
zap_cursor_t zc;
zap_attribute_t zap;
znode_t *xzp;
dmu_tx_t *tx;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
zfs_dirlock_t dl;
int skipped = 0;
int error;
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
error = zfs_zget(zfsvfs,
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
if (error) {
skipped += 1;
continue;
}
ASSERT((ZTOV(xzp)->v_type == VREG) ||
(ZTOV(xzp)->v_type == VLNK));
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, dzp->z_id);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
dmu_tx_hold_bonus(tx, xzp->z_id);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
VN_RELE(ZTOV(xzp));
skipped += 1;
continue;
}
bzero(&dl, sizeof (dl));
dl.dl_dzp = dzp;
dl.dl_name = zap.za_name;
error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
if (error)
skipped += 1;
dmu_tx_commit(tx);
VN_RELE(ZTOV(xzp));
}
zap_cursor_fini(&zc);
if (error != ENOENT)
skipped += 1;
return (skipped);
}
void
zfs_rmnode(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
objset_t *os = zfsvfs->z_os;
znode_t *xzp = NULL;
char obj_name[17];
dmu_tx_t *tx;
uint64_t acl_obj;
int error;
ASSERT(ZTOV(zp)->v_count == 0);
ASSERT(zp->z_phys->zp_links == 0);
/*
* If this is an attribute directory, purge its contents.
*/
if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) {
if (zfs_purgedir(zp) != 0) {
/*
* Not enough space to delete some xattrs.
* Leave it on the unlinked set.
*/
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
return;
}
}
/*
* If the file has extended attributes, we're going to unlink
* the xattr dir.
*/
if (zp->z_phys->zp_xattr) {
error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
ASSERT(error == 0);
}
acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
/*
* Set up the transaction.
*/
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
if (xzp) {
dmu_tx_hold_bonus(tx, xzp->z_id);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
}
if (acl_obj)
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
/*
* Not enough space to delete the file. Leave it in the
* unlinked set, leaking it until the fs is remounted (at
* which point we'll call zfs_unlinked_drain() to process it).
*/
dmu_tx_abort(tx);
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
goto out;
}
if (xzp) {
dmu_buf_will_dirty(xzp->z_dbuf, tx);
mutex_enter(&xzp->z_lock);
xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
xzp->z_phys->zp_links = 0; /* no more links to it */
mutex_exit(&xzp->z_lock);
zfs_unlinked_add(xzp, tx);
}
/* Remove this znode from the unlinked set */
error = zap_remove(os, zfsvfs->z_unlinkedobj,
zfs_unlinked_hexname(obj_name, zp->z_id), tx);
ASSERT3U(error, ==, 0);
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
out:
if (xzp)
VN_RELE(ZTOV(xzp));
}
static uint64_t
zfs_dirent(znode_t *zp)
{
uint64_t de = zp->z_id;
if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
de |= IFTODT((zp)->z_phys->zp_mode) << 60;
return (de);
}
/*
* Link zp into dl. Can only fail if zp has been unlinked.
*/
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
znode_t *dzp = dl->dl_dzp;
vnode_t *vp = ZTOV(zp);
uint64_t value;
int zp_is_dir = (vp->v_type == VDIR);
int error;
dmu_buf_will_dirty(zp->z_dbuf, tx);
mutex_enter(&zp->z_lock);
if (!(flag & ZRENAMING)) {
if (zp->z_unlinked) { /* no new links to unlinked zp */
ASSERT(!(flag & (ZNEW | ZEXISTS)));
mutex_exit(&zp->z_lock);
return (ENOENT);
}
zp->z_phys->zp_links++;
}
zp->z_phys->zp_parent = dzp->z_id; /* dzp is now zp's parent */
if (!(flag & ZNEW))
zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
mutex_exit(&zp->z_lock);
dmu_buf_will_dirty(dzp->z_dbuf, tx);
mutex_enter(&dzp->z_lock);
dzp->z_phys->zp_size++; /* one dirent added */
dzp->z_phys->zp_links += zp_is_dir; /* ".." link from zp */
zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
mutex_exit(&dzp->z_lock);
value = zfs_dirent(zp);
error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
8, 1, &value, tx);
ASSERT(error == 0);
dnlc_update(ZTOV(dzp), dl->dl_name, vp);
return (0);
}
/*
* Unlink zp from dl, and mark zp for deletion if this was the last link.
* Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
* If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
* If it's non-NULL, we use it to indicate whether the znode needs deletion,
* and it's the caller's job to do it.
*/
int
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
boolean_t *unlinkedp)
{
znode_t *dzp = dl->dl_dzp;
vnode_t *vp = ZTOV(zp);
int zp_is_dir = (vp->v_type == VDIR);
boolean_t unlinked = B_FALSE;
int error;
dnlc_remove(ZTOV(dzp), dl->dl_name);
if (!(flag & ZRENAMING)) {
dmu_buf_will_dirty(zp->z_dbuf, tx);
if (vn_vfswlock(vp)) /* prevent new mounts on zp */
return (EBUSY);
if (vn_ismntpt(vp)) { /* don't remove mount point */
vn_vfsunlock(vp);
return (EBUSY);
}
mutex_enter(&zp->z_lock);
if (zp_is_dir && !zfs_dirempty(zp)) { /* dir not empty */
mutex_exit(&zp->z_lock);
vn_vfsunlock(vp);
return (EEXIST);
}
if (zp->z_phys->zp_links <= zp_is_dir) {
zfs_panic_recover("zfs: link count on %s is %u, "
"should be at least %u",
zp->z_vnode->v_path ? zp->z_vnode->v_path :
"<unknown>", (int)zp->z_phys->zp_links,
zp_is_dir + 1);
zp->z_phys->zp_links = zp_is_dir + 1;
}
if (--zp->z_phys->zp_links == zp_is_dir) {
zp->z_unlinked = B_TRUE;
zp->z_phys->zp_links = 0;
unlinked = B_TRUE;
} else {
zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
}
mutex_exit(&zp->z_lock);
vn_vfsunlock(vp);
}
dmu_buf_will_dirty(dzp->z_dbuf, tx);
mutex_enter(&dzp->z_lock);
dzp->z_phys->zp_size--; /* one dirent removed */
dzp->z_phys->zp_links -= zp_is_dir; /* ".." link from zp */
zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
mutex_exit(&dzp->z_lock);
if (zp->z_zfsvfs->z_norm) {
if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
(flag & ZCIEXACT)) ||
((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
!(flag & ZCILOOK)))
error = zap_remove_norm(zp->z_zfsvfs->z_os,
dzp->z_id, dl->dl_name, MT_EXACT, tx);
else
error = zap_remove_norm(zp->z_zfsvfs->z_os,
dzp->z_id, dl->dl_name, MT_FIRST, tx);
} else {
error = zap_remove(zp->z_zfsvfs->z_os,
dzp->z_id, dl->dl_name, tx);
}
ASSERT(error == 0);
if (unlinkedp != NULL)
*unlinkedp = unlinked;
else if (unlinked)
zfs_unlinked_add(zp, tx);
return (0);
}
/*
* Indicate whether the directory is empty. Works with or without z_lock
* held, but can only be consider a hint in the latter case. Returns true
* if only "." and ".." remain and there's no work in progress.
*/
boolean_t
zfs_dirempty(znode_t *dzp)
{
return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0);
}
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
znode_t *xzp;
dmu_tx_t *tx;
int error;
zfs_fuid_info_t *fuidp = NULL;
*xvpp = NULL;
if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
return (error);
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_bonus(tx, zp->z_id);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
if (zfsvfs->z_fuid_obj == 0) {
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
} else {
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
}
}
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
dmu_tx_wait(tx);
dmu_tx_abort(tx);
return (error);
}
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
ASSERT(xzp->z_phys->zp_parent == zp->z_id);
dmu_buf_will_dirty(zp->z_dbuf, tx);
zp->z_phys->zp_xattr = xzp->z_id;
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
xzp, "", NULL, fuidp, vap);
if (fuidp)
zfs_fuid_info_free(fuidp);
dmu_tx_commit(tx);
*xvpp = ZTOV(xzp);
return (0);
}
/*
* Return a znode for the extended attribute directory for zp.
* ** If the directory does not already exist, it is created **
*
* IN: zp - znode to obtain attribute directory from
* cr - credentials of caller
* flags - flags from the VOP_LOOKUP call
*
* OUT: xzpp - pointer to extended attribute znode
*
* RETURN: 0 on success
* error number on failure
*/
int
zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
znode_t *xzp;
zfs_dirlock_t *dl;
vattr_t va;
int error;
top:
error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
if (error)
return (error);
if (xzp != NULL) {
*xvpp = ZTOV(xzp);
zfs_dirent_unlock(dl);
return (0);
}
ASSERT(zp->z_phys->zp_xattr == 0);
if (!(flags & CREATE_XATTR_DIR)) {
zfs_dirent_unlock(dl);
return (ENOENT);
}
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
zfs_dirent_unlock(dl);
return (EROFS);
}
/*
* The ability to 'create' files in an attribute
* directory comes from the write_xattr permission on the base file.
*
* The ability to 'search' an attribute directory requires
* read_xattr permission on the base file.
*
* Once in a directory the ability to read/write attributes
* is controlled by the permissions on the attribute file.
*/
va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
va.va_type = VDIR;
va.va_mode = S_IFDIR | S_ISVTX | 0777;
zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
error = zfs_make_xattrdir(zp, &va, xvpp, cr);
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
/* NB: we already did dmu_tx_wait() if necessary */
goto top;
}
return (error);
}
/*
* Decide whether it is okay to remove within a sticky directory.
*
* In sticky directories, write access is not sufficient;
* you can remove entries from a directory only if:
*
* you own the directory,
* you own the entry,
* the entry is a plain file and you have write access,
* or you are privileged (checked in secpolicy...).
*
* The function returns 0 if remove access is granted.
*/
int
zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
{
uid_t uid;
uid_t downer;
uid_t fowner;
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL) /* ZIL replay */
return (0);
if ((zdp->z_phys->zp_mode & S_ISVTX) == 0)
return (0);
downer = zfs_fuid_map_id(zfsvfs, zdp->z_phys->zp_uid, cr, ZFS_OWNER);
fowner = zfs_fuid_map_id(zfsvfs, zp->z_phys->zp_uid, cr, ZFS_OWNER);
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
(ZTOV(zp)->v_type == VREG &&
zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
return (0);
else
return (secpolicy_vnode_remove(cr));
}
+688
View File
@@ -0,0 +1,688 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)zfs_fuid.c 1.5 08/01/31 SMI"
#include <sys/zfs_context.h>
#include <sys/sunddi.h>
#include <sys/dmu.h>
#include <sys/avl.h>
#include <sys/zap.h>
#include <sys/refcount.h>
#include <sys/nvpair.h>
#ifdef _KERNEL
#include <sys/kidmap.h>
#include <sys/sid.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#endif
#include <sys/zfs_fuid.h>
/*
* FUID Domain table(s).
*
* The FUID table is stored as a packed nvlist of an array
* of nvlists which contain an index, domain string and offset
*
* During file system initialization the nvlist(s) are read and
* two AVL trees are created. One tree is keyed by the index number
* and the other by the domain string. Nodes are never removed from
* trees, but new entries may be added. If a new entry is added then the
* on-disk packed nvlist will also be updated.
*/
#define FUID_IDX "fuid_idx"
#define FUID_DOMAIN "fuid_domain"
#define FUID_OFFSET "fuid_offset"
#define FUID_NVP_ARRAY "fuid_nvlist"
typedef struct fuid_domain {
avl_node_t f_domnode;
avl_node_t f_idxnode;
ksiddomain_t *f_ksid;
uint64_t f_idx;
} fuid_domain_t;
/*
* Compare two indexes.
*/
static int
idx_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = arg1;
const fuid_domain_t *node2 = arg2;
if (node1->f_idx < node2->f_idx)
return (-1);
else if (node1->f_idx > node2->f_idx)
return (1);
return (0);
}
/*
* Compare two domain strings.
*/
static int
domain_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = arg1;
const fuid_domain_t *node2 = arg2;
int val;
val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
if (val == 0)
return (0);
return (val > 0 ? 1 : -1);
}
/*
* load initial fuid domain and idx trees. This function is used by
* both the kernel and zdb.
*/
uint64_t
zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
avl_tree_t *domain_tree)
{
dmu_buf_t *db;
uint64_t fuid_size;
avl_create(idx_tree, idx_compare,
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
avl_create(domain_tree, domain_compare,
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
VERIFY(0 == dmu_bonus_hold(os, fuid_obj, FTAG, &db));
fuid_size = *(uint64_t *)db->db_data;
dmu_buf_rele(db, FTAG);
if (fuid_size) {
nvlist_t **fuidnvp;
nvlist_t *nvp = NULL;
uint_t count;
char *packed;
int i;
packed = kmem_alloc(fuid_size, KM_SLEEP);
VERIFY(dmu_read(os, fuid_obj, 0, fuid_size, packed) == 0);
VERIFY(nvlist_unpack(packed, fuid_size,
&nvp, 0) == 0);
VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
&fuidnvp, &count) == 0);
for (i = 0; i != count; i++) {
fuid_domain_t *domnode;
char *domain;
uint64_t idx;
VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
&domain) == 0);
VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
&idx) == 0);
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_idx = idx;
domnode->f_ksid = ksid_lookupdomain(domain);
avl_add(idx_tree, domnode);
avl_add(domain_tree, domnode);
}
nvlist_free(nvp);
kmem_free(packed, fuid_size);
}
return (fuid_size);
}
void
zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
{
fuid_domain_t *domnode;
void *cookie;
cookie = NULL;
while (domnode = avl_destroy_nodes(domain_tree, &cookie))
ksiddomain_rele(domnode->f_ksid);
avl_destroy(domain_tree);
cookie = NULL;
while (domnode = avl_destroy_nodes(idx_tree, &cookie))
kmem_free(domnode, sizeof (fuid_domain_t));
avl_destroy(idx_tree);
}
char *
zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
{
fuid_domain_t searchnode, *findnode;
avl_index_t loc;
searchnode.f_idx = idx;
findnode = avl_find(idx_tree, &searchnode, &loc);
return (findnode->f_ksid->kd_name);
}
#ifdef _KERNEL
/*
* Load the fuid table(s) into memory.
*/
static void
zfs_fuid_init(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
int error = 0;
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
return;
}
if (zfsvfs->z_fuid_obj == 0) {
/* first make sure we need to allocate object */
error = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
if (error == ENOENT && tx != NULL) {
zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
sizeof (uint64_t), tx);
VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
&zfsvfs->z_fuid_obj, tx) == 0);
}
}
zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
zfsvfs->z_fuid_loaded = B_TRUE;
rw_exit(&zfsvfs->z_fuid_lock);
}
/*
* Query domain table for a given domain.
*
* If domain isn't found it is added to AVL trees and
* the results are pushed out to disk.
*/
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
dmu_tx_t *tx)
{
fuid_domain_t searchnode, *findnode;
avl_index_t loc;
/*
* If the dummy "nobody" domain then return an index of 0
* to cause the created FUID to be a standard POSIX id
* for the user nobody.
*/
if (domain[0] == '\0') {
*retdomain = "";
return (0);
}
searchnode.f_ksid = ksid_lookupdomain(domain);
if (retdomain) {
*retdomain = searchnode.f_ksid->kd_name;
}
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs, tx);
rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
rw_exit(&zfsvfs->z_fuid_lock);
if (findnode) {
ksiddomain_rele(searchnode.f_ksid);
return (findnode->f_idx);
} else {
fuid_domain_t *domnode;
nvlist_t *nvp;
nvlist_t **fuids;
uint64_t retidx;
size_t nvsize = 0;
char *packed;
dmu_buf_t *db;
int i = 0;
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_ksid = searchnode.f_ksid;
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
avl_add(&zfsvfs->z_fuid_domain, domnode);
avl_add(&zfsvfs->z_fuid_idx, domnode);
/*
* Now resync the on-disk nvlist.
*/
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
domnode = avl_first(&zfsvfs->z_fuid_domain);
fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
while (domnode) {
VERIFY(nvlist_alloc(&fuids[i],
NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
domnode->f_idx) == 0);
VERIFY(nvlist_add_uint64(fuids[i],
FUID_OFFSET, 0) == 0);
VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
domnode->f_ksid->kd_name) == 0);
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
}
VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
fuids, retidx) == 0);
for (i = 0; i != retidx; i++)
nvlist_free(fuids[i]);
kmem_free(fuids, retidx * sizeof (void *));
VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
packed = kmem_alloc(nvsize, KM_SLEEP);
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
NV_ENCODE_XDR, KM_SLEEP) == 0);
nvlist_free(nvp);
zfsvfs->z_fuid_size = nvsize;
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
zfsvfs->z_fuid_size, packed, tx);
kmem_free(packed, zfsvfs->z_fuid_size);
VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
dmu_buf_rele(db, FTAG);
rw_exit(&zfsvfs->z_fuid_lock);
return (retidx);
}
}
/*
* Query domain table by index, returning domain string
*
* Returns a pointer from an avl node of the domain string.
*
*/
static char *
zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
{
char *domain;
if (idx == 0 || !zfsvfs->z_use_fuids)
return (NULL);
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs, NULL);
rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
rw_exit(&zfsvfs->z_fuid_lock);
ASSERT(domain);
return (domain);
}
void
zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
{
*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_uid,
cr, ZFS_OWNER);
*gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_gid,
cr, ZFS_GROUP);
}
uid_t
zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
cred_t *cr, zfs_fuid_type_t type)
{
uint32_t index = FUID_INDEX(fuid);
char *domain;
uid_t id;
if (index == 0)
return (fuid);
domain = zfs_fuid_find_by_idx(zfsvfs, index);
ASSERT(domain != NULL);
if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
(void) kidmap_getuidbysid(crgetzone(cr), domain,
FUID_RID(fuid), &id);
} else {
(void) kidmap_getgidbysid(crgetzone(cr), domain,
FUID_RID(fuid), &id);
}
return (id);
}
/*
* Add a FUID node to the list of fuid's being created for this
* ACL
*
* If ACL has multiple domains, then keep only one copy of each unique
* domain.
*/
static void
zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
uint64_t idx, uint64_t id, zfs_fuid_type_t type)
{
zfs_fuid_t *fuid;
zfs_fuid_domain_t *fuid_domain;
zfs_fuid_info_t *fuidp;
uint64_t fuididx;
boolean_t found = B_FALSE;
if (*fuidpp == NULL)
*fuidpp = zfs_fuid_info_alloc();
fuidp = *fuidpp;
/*
* First find fuid domain index in linked list
*
* If one isn't found then create an entry.
*/
for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
fuid_domain), fuididx++) {
if (idx == fuid_domain->z_domidx) {
found = B_TRUE;
break;
}
}
if (!found) {
fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
fuid_domain->z_domain = domain;
fuid_domain->z_domidx = idx;
list_insert_tail(&fuidp->z_domains, fuid_domain);
fuidp->z_domain_str_sz += strlen(domain) + 1;
fuidp->z_domain_cnt++;
}
if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
/*
* Now allocate fuid entry and add it on the end of the list
*/
fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
fuid->z_id = id;
fuid->z_domidx = idx;
fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
list_insert_tail(&fuidp->z_fuids, fuid);
fuidp->z_fuid_cnt++;
} else {
if (type == ZFS_OWNER)
fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
else
fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
}
}
/*
* Create a file system FUID, based on information in the users cred
*/
uint64_t
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
dmu_tx_t *tx, cred_t *cr, zfs_fuid_info_t **fuidp)
{
uint64_t idx;
ksid_t *ksid;
uint32_t rid;
char *kdomain;
const char *domain;
uid_t id;
VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
if (type == ZFS_OWNER)
id = crgetuid(cr);
else
id = crgetgid(cr);
if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id))
return ((uint64_t)id);
ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
VERIFY(ksid != NULL);
rid = ksid_getrid(ksid);
domain = ksid_getdomain(ksid);
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
return (FUID_ENCODE(idx, rid));
}
/*
* Create a file system FUID for an ACL ace
* or a chown/chgrp of the file.
* This is similar to zfs_fuid_create_cred, except that
* we can't find the domain + rid information in the
* cred. Instead we have to query Winchester for the
* domain and rid.
*
* During replay operations the domain+rid information is
* found in the zfs_fuid_info_t that the replay code has
* attached to the zfsvfs of the file system.
*/
uint64_t
zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
zfs_fuid_type_t type, dmu_tx_t *tx, zfs_fuid_info_t **fuidpp)
{
const char *domain;
char *kdomain;
uint32_t fuid_idx = FUID_INDEX(id);
uint32_t rid;
idmap_stat status;
uint64_t idx;
boolean_t is_replay = (zfsvfs->z_assign >= TXG_INITIAL);
zfs_fuid_t *zfuid = NULL;
zfs_fuid_info_t *fuidp;
/*
* If POSIX ID, or entry is already a FUID then
* just return the id
*
* We may also be handed an already FUID'ized id via
* chmod.
*/
if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
return (id);
if (is_replay) {
fuidp = zfsvfs->z_fuid_replay;
/*
* If we are passed an ephemeral id, but no
* fuid_info was logged then return NOBODY.
* This is most likely a result of idmap service
* not being available.
*/
if (fuidp == NULL)
return (UID_NOBODY);
switch (type) {
case ZFS_ACE_USER:
case ZFS_ACE_GROUP:
zfuid = list_head(&fuidp->z_fuids);
rid = FUID_RID(zfuid->z_logfuid);
idx = FUID_INDEX(zfuid->z_logfuid);
break;
case ZFS_OWNER:
rid = FUID_RID(fuidp->z_fuid_owner);
idx = FUID_INDEX(fuidp->z_fuid_owner);
break;
case ZFS_GROUP:
rid = FUID_RID(fuidp->z_fuid_group);
idx = FUID_INDEX(fuidp->z_fuid_group);
break;
};
domain = fuidp->z_domain_table[idx -1];
} else {
if (type == ZFS_OWNER || type == ZFS_ACE_USER)
status = kidmap_getsidbyuid(crgetzone(cr), id,
&domain, &rid);
else
status = kidmap_getsidbygid(crgetzone(cr), id,
&domain, &rid);
if (status != 0) {
/*
* When returning nobody we will need to
* make a dummy fuid table entry for logging
* purposes.
*/
rid = UID_NOBODY;
domain = "";
}
}
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
if (!is_replay)
zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type);
else if (zfuid != NULL) {
list_remove(&fuidp->z_fuids, zfuid);
kmem_free(zfuid, sizeof (zfs_fuid_t));
}
return (FUID_ENCODE(idx, rid));
}
void
zfs_fuid_destroy(zfsvfs_t *zfsvfs)
{
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (!zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
return;
}
zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
rw_exit(&zfsvfs->z_fuid_lock);
}
/*
* Allocate zfs_fuid_info for tracking FUIDs created during
* zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
*/
zfs_fuid_info_t *
zfs_fuid_info_alloc(void)
{
zfs_fuid_info_t *fuidp;
fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
offsetof(zfs_fuid_domain_t, z_next));
list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
offsetof(zfs_fuid_t, z_next));
return (fuidp);
}
/*
* Release all memory associated with zfs_fuid_info_t
*/
void
zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
{
zfs_fuid_t *zfuid;
zfs_fuid_domain_t *zdomain;
while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
list_remove(&fuidp->z_fuids, zfuid);
kmem_free(zfuid, sizeof (zfs_fuid_t));
}
if (fuidp->z_domain_table != NULL)
kmem_free(fuidp->z_domain_table,
(sizeof (char **)) * fuidp->z_domain_cnt);
while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
list_remove(&fuidp->z_domains, zdomain);
kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
}
kmem_free(fuidp, sizeof (zfs_fuid_info_t));
}
/*
* Check to see if id is a groupmember. If cred
* has ksid info then sidlist is checked first
* and if still not found then POSIX groups are checked
*
* Will use a straight FUID compare when possible.
*/
boolean_t
zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
{
ksid_t *ksid = crgetsid(cr, KSID_GROUP);
uid_t gid;
if (ksid) {
int i;
ksid_t *ksid_groups;
ksidlist_t *ksidlist = crgetsidlist(cr);
uint32_t idx = FUID_INDEX(id);
uint32_t rid = FUID_RID(id);
ASSERT(ksidlist);
ksid_groups = ksidlist->ksl_sids;
for (i = 0; i != ksidlist->ksl_nsid; i++) {
if (idx == 0) {
if (id != IDMAP_WK_CREATOR_GROUP_GID &&
id == ksid_groups[i].ks_id) {
return (B_TRUE);
}
} else {
char *domain;
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
ASSERT(domain != NULL);
if (strcmp(domain,
IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
return (B_FALSE);
if ((strcmp(domain,
ksid_groups[i].ks_domain->kd_name) == 0) &&
rid == ksid_groups[i].ks_rid)
return (B_TRUE);
}
}
}
/*
* Not found in ksidlist, check posix groups
*/
gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
return (groupmember(gid, cr));
}
#endif
File diff suppressed because it is too large Load Diff
+693
View File
@@ -0,0 +1,693 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)zfs_log.c 1.13 08/04/09 SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/file.h>
#include <sys/vfs.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_dir.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/byteorder.h>
#include <sys/policy.h>
#include <sys/stat.h>
#include <sys/mode.h>
#include <sys/acl.h>
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/zfs_fuid.h>
#include <sys/ddi.h>
/*
* All the functions in this file are used to construct the log entries
* to record transactions. They allocate * an intent log transaction
* structure (itx_t) and save within it all the information necessary to
* possibly replay the transaction. The itx is then assigned a sequence
* number and inserted in the in-memory list anchored in the zilog.
*/
int
zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
{
int isxvattr = (vap->va_mask & AT_XVATTR);
switch (type) {
case Z_FILE:
if (vsecp == NULL && !isxvattr)
return (TX_CREATE);
if (vsecp && isxvattr)
return (TX_CREATE_ACL_ATTR);
if (vsecp)
return (TX_CREATE_ACL);
else
return (TX_CREATE_ATTR);
/*NOTREACHED*/
case Z_DIR:
if (vsecp == NULL && !isxvattr)
return (TX_MKDIR);
if (vsecp && isxvattr)
return (TX_MKDIR_ACL_ATTR);
if (vsecp)
return (TX_MKDIR_ACL);
else
return (TX_MKDIR_ATTR);
case Z_XATTRDIR:
return (TX_MKXATTR);
}
ASSERT(0);
return (TX_MAX_TYPE);
}
/*
* build up the log data necessary for logging xvattr_t
* First lr_attr_t is initialized. following the lr_attr_t
* is the mapsize and attribute bitmap copied from the xvattr_t.
* Following the bitmap and bitmapsize two 64 bit words are reserved
* for the create time which may be set. Following the create time
* records a single 64 bit integer which has the bits to set on
* replay for the xvattr.
*/
static void
zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
{
uint32_t *bitmap;
uint64_t *attrs;
uint64_t *crtime;
xoptattr_t *xoap;
void *scanstamp;
int i;
xoap = xva_getxoptattr(xvap);
ASSERT(xoap);
lrattr->lr_attr_masksize = xvap->xva_mapsize;
bitmap = &lrattr->lr_attr_bitmap;
for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
*bitmap = xvap->xva_reqattrmap[i];
}
/* Now pack the attributes up in a single uint64_t */
attrs = (uint64_t *)bitmap;
crtime = attrs + 1;
scanstamp = (caddr_t)(crtime + 2);
*attrs = 0;
if (XVA_ISSET_REQ(xvap, XAT_READONLY))
*attrs |= (xoap->xoa_readonly == 0) ? 0 :
XAT0_READONLY;
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
*attrs |= (xoap->xoa_hidden == 0) ? 0 :
XAT0_HIDDEN;
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
*attrs |= (xoap->xoa_system == 0) ? 0 :
XAT0_SYSTEM;
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
*attrs |= (xoap->xoa_archive == 0) ? 0 :
XAT0_ARCHIVE;
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
*attrs |= (xoap->xoa_immutable == 0) ? 0 :
XAT0_IMMUTABLE;
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
*attrs |= (xoap->xoa_nounlink == 0) ? 0 :
XAT0_NOUNLINK;
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
*attrs |= (xoap->xoa_appendonly == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
*attrs |= (xoap->xoa_opaque == 0) ? 0 :
XAT0_APPENDONLY;
if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
*attrs |= (xoap->xoa_nodump == 0) ? 0 :
XAT0_NODUMP;
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
*attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
XAT0_AV_QUARANTINED;
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
*attrs |= (xoap->xoa_av_modified == 0) ? 0 :
XAT0_AV_MODIFIED;
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
}
static void *
zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start)
{
zfs_fuid_t *zfuid;
uint64_t *fuidloc = start;
/* First copy in the ACE FUIDs */
for (zfuid = list_head(&fuidp->z_fuids); zfuid;
zfuid = list_next(&fuidp->z_fuids, zfuid)) {
*fuidloc++ = zfuid->z_logfuid;
}
return (fuidloc);
}
static void *
zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
{
zfs_fuid_domain_t *zdomain;
/* now copy in the domain info, if any */
if (fuidp->z_domain_str_sz != 0) {
for (zdomain = list_head(&fuidp->z_domains); zdomain;
zdomain = list_next(&fuidp->z_domains, zdomain)) {
bcopy((void *)zdomain->z_domain, start,
strlen(zdomain->z_domain) + 1);
start = (caddr_t)start +
strlen(zdomain->z_domain) + 1;
}
}
return (start);
}
/*
* zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR,
* TX_MKDIR_ATTR and TX_MKXATTR
* transactions.
*
* TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
* domain information appended prior to the name. In this case the
* uid/gid in the log record will be a log centric FUID.
*
* TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
* may contain attributes, ACL and optional fuid information.
*
* TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
* and ACL and normal users/groups in the ACEs.
*
* There may be an optional xvattr attribute information similar
* to zfs_log_setattr.
*
* Also, after the file name "domain" strings may be appended.
*/
void
zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
zfs_fuid_info_t *fuidp, vattr_t *vap)
{
itx_t *itx;
uint64_t seq;
lr_create_t *lr;
lr_acl_create_t *lracl;
size_t aclsize;
size_t xvatsize = 0;
size_t txsize;
xvattr_t *xvap = (xvattr_t *)vap;
void *end;
size_t lrsize;
size_t namesize = strlen(name) + 1;
size_t fuidsz = 0;
if (zilog == NULL)
return;
/*
* If we have FUIDs present then add in space for
* domains and ACE fuid's if any.
*/
if (fuidp) {
fuidsz += fuidp->z_domain_str_sz;
fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
}
if (vap->va_mask & AT_XVATTR)
xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
(int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
(int)txtype == TX_MKXATTR) {
txsize = sizeof (*lr) + namesize + fuidsz + xvatsize;
lrsize = sizeof (*lr);
} else {
aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0;
txsize =
sizeof (lr_acl_create_t) + namesize + fuidsz +
ZIL_ACE_LENGTH(aclsize) + xvatsize;
lrsize = sizeof (lr_acl_create_t);
}
itx = zil_itx_create(txtype, txsize);
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
lr->lr_mode = zp->z_phys->zp_mode;
if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) {
lr->lr_uid = (uint64_t)zp->z_phys->zp_uid;
} else {
lr->lr_uid = fuidp->z_fuid_owner;
}
if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) {
lr->lr_gid = (uint64_t)zp->z_phys->zp_gid;
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
lr->lr_gen = zp->z_phys->zp_gen;
lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
lr->lr_rdev = zp->z_phys->zp_rdev;
/*
* Fill in xvattr info if any
*/
if (vap->va_mask & AT_XVATTR) {
zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
end = (caddr_t)lr + lrsize + xvatsize;
} else {
end = (caddr_t)lr + lrsize;
}
/* Now fill in any ACL info */
if (vsecp) {
lracl = (lr_acl_create_t *)&itx->itx_lr;
lracl->lr_aclcnt = vsecp->vsa_aclcnt;
lracl->lr_acl_bytes = aclsize;
lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS)
lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
else
lracl->lr_acl_flags = 0;
bcopy(vsecp->vsa_aclentp, end, aclsize);
end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
}
/* drop in FUID info */
if (fuidp) {
end = zfs_log_fuid_ids(fuidp, end);
end = zfs_log_fuid_domains(fuidp, end);
}
/*
* Now place file name in log record
*/
bcopy(name, end, namesize);
seq = zil_itx_assign(zilog, itx, tx);
dzp->z_last_itx = seq;
zp->z_last_itx = seq;
}
/*
* zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
*/
void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name)
{
itx_t *itx;
uint64_t seq;
lr_remove_t *lr;
size_t namesize = strlen(name) + 1;
if (zilog == NULL)
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_remove_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
seq = zil_itx_assign(zilog, itx, tx);
dzp->z_last_itx = seq;
}
/*
* zfs_log_link() handles TX_LINK transactions.
*/
void
zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name)
{
itx_t *itx;
uint64_t seq;
lr_link_t *lr;
size_t namesize = strlen(name) + 1;
if (zilog == NULL)
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
lr = (lr_link_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_link_obj = zp->z_id;
bcopy(name, (char *)(lr + 1), namesize);
seq = zil_itx_assign(zilog, itx, tx);
dzp->z_last_itx = seq;
zp->z_last_itx = seq;
}
/*
* zfs_log_symlink() handles TX_SYMLINK transactions.
*/
void
zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link)
{
itx_t *itx;
uint64_t seq;
lr_create_t *lr;
size_t namesize = strlen(name) + 1;
size_t linksize = strlen(link) + 1;
if (zilog == NULL)
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
lr->lr_mode = zp->z_phys->zp_mode;
lr->lr_uid = zp->z_phys->zp_uid;
lr->lr_gid = zp->z_phys->zp_gid;
lr->lr_gen = zp->z_phys->zp_gen;
lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
bcopy(name, (char *)(lr + 1), namesize);
bcopy(link, (char *)(lr + 1) + namesize, linksize);
seq = zil_itx_assign(zilog, itx, tx);
dzp->z_last_itx = seq;
zp->z_last_itx = seq;
}
/*
* zfs_log_rename() handles TX_RENAME transactions.
*/
void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
{
itx_t *itx;
uint64_t seq;
lr_rename_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;
if (zilog == NULL)
return;
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr = (lr_rename_t *)&itx->itx_lr;
lr->lr_sdoid = sdzp->z_id;
lr->lr_tdoid = tdzp->z_id;
bcopy(sname, (char *)(lr + 1), snamesize);
bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
seq = zil_itx_assign(zilog, itx, tx);
sdzp->z_last_itx = seq;
tdzp->z_last_itx = seq;
szp->z_last_itx = seq;
}
/*
* zfs_log_write() handles TX_WRITE transactions.
*/
ssize_t zfs_immediate_write_sz = 32768;
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
sizeof (lr_write_t))
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
itx_wr_state_t write_state;
boolean_t slogging;
uintptr_t fsync_cnt;
if (zilog == NULL || zp->z_unlinked)
return;
/*
* Writes are handled in three different ways:
*
* WR_INDIRECT:
* If the write is greater than zfs_immediate_write_sz and there are
* no separate logs in this pool then later *if* we need to log the
* write then dmu_sync() is used to immediately write the block and
* its block pointer is put in the log record.
* WR_COPIED:
* If we know we'll immediately be committing the
* transaction (FSYNC or FDSYNC), the we allocate a larger
* log record here for the data and copy the data in.
* WR_NEED_COPY:
* Otherwise we don't allocate a buffer, and *if* we need to
* flush the write later then a buffer is allocated and
* we retrieve the data using the dmu.
*/
slogging = spa_has_slogs(zilog->zl_spa);
if (resid > zfs_immediate_write_sz && !slogging)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
}
while (resid) {
itx_t *itx;
lr_write_t *lr;
ssize_t len;
/*
* If there are slogs and the write would overflow the largest
* block, then because we don't want to use the main pool
* to dmu_sync, we have to split the write.
*/
if (slogging && resid > ZIL_MAX_LOG_DATA)
len = SPA_MAXBLOCKSIZE >> 1;
else
len = resid;
itx = zil_itx_create(txtype, sizeof (*lr) +
(write_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
zp->z_id, off, len, lr + 1) != 0) {
kmem_free(itx, offsetof(itx_t, itx_lr) +
itx->itx_lr.lrc_reclen);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
write_state = WR_NEED_COPY;
}
itx->itx_wr_state = write_state;
if (write_state == WR_NEED_COPY)
itx->itx_sod += len;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
lr->lr_blkoff = 0;
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = zp->z_zfsvfs;
if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) ||
(ioflag & (FSYNC | FDSYNC)))
itx->itx_sync = B_TRUE;
else
itx->itx_sync = B_FALSE;
zp->z_last_itx = zil_itx_assign(zilog, itx, tx);
off += len;
resid -= len;
}
}
/*
* zfs_log_truncate() handles TX_TRUNCATE transactions.
*/
void
zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len)
{
itx_t *itx;
uint64_t seq;
lr_truncate_t *lr;
if (zilog == NULL || zp->z_unlinked)
return;
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_truncate_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
itx->itx_sync = (zp->z_sync_cnt != 0);
seq = zil_itx_assign(zilog, itx, tx);
zp->z_last_itx = seq;
}
/*
* zfs_log_setattr() handles TX_SETATTR transactions.
*/
void
zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
uint64_t seq;
lr_setattr_t *lr;
xvattr_t *xvap = (xvattr_t *)vap;
size_t recsize = sizeof (lr_setattr_t);
void *start;
if (zilog == NULL || zp->z_unlinked)
return;
/*
* If XVATTR set, then log record size needs to allow
* for lr_attr_t + xvattr mask, mapsize and create time
* plus actual attribute values
*/
if (vap->va_mask & AT_XVATTR)
recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
if (fuidp)
recsize += fuidp->z_domain_str_sz;
itx = zil_itx_create(txtype, recsize);
lr = (lr_setattr_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
lr->lr_mask = (uint64_t)mask_applied;
lr->lr_mode = (uint64_t)vap->va_mode;
if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid))
lr->lr_uid = fuidp->z_fuid_owner;
else
lr->lr_uid = (uint64_t)vap->va_uid;
if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid))
lr->lr_gid = fuidp->z_fuid_group;
else
lr->lr_gid = (uint64_t)vap->va_gid;
lr->lr_size = (uint64_t)vap->va_size;
ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
start = (lr_setattr_t *)(lr + 1);
if (vap->va_mask & AT_XVATTR) {
zfs_log_xvattr((lr_attr_t *)start, xvap);
start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
}
/*
* Now stick on domain information if any on end
*/
if (fuidp)
(void) zfs_log_fuid_domains(fuidp, start);
itx->itx_sync = (zp->z_sync_cnt != 0);
seq = zil_itx_assign(zilog, itx, tx);
zp->z_last_itx = seq;
}
/*
* zfs_log_acl() handles TX_ACL transactions.
*/
void
zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
{
itx_t *itx;
uint64_t seq;
lr_acl_v0_t *lrv0;
lr_acl_t *lr;
int txtype;
int lrsize;
size_t txsize;
size_t aclbytes = vsecp->vsa_aclentsz;
txtype = (zp->z_zfsvfs->z_version == ZPL_VERSION_INITIAL) ?
TX_ACL_V0 : TX_ACL;
if (txtype == TX_ACL)
lrsize = sizeof (*lr);
else
lrsize = sizeof (*lrv0);
if (zilog == NULL || zp->z_unlinked)
return;
txsize = lrsize +
((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) +
(fuidp ? fuidp->z_domain_str_sz : 0) +
sizeof (uint64) * (fuidp ? fuidp->z_fuid_cnt : 0);
itx = zil_itx_create(txtype, txsize);
lr = (lr_acl_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
if (txtype == TX_ACL) {
lr->lr_acl_bytes = aclbytes;
lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS)
lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
else
lr->lr_acl_flags = 0;
}
lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt;
if (txtype == TX_ACL_V0) {
lrv0 = (lr_acl_v0_t *)lr;
bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes);
} else {
void *start = (ace_t *)(lr + 1);
bcopy(vsecp->vsa_aclentp, start, aclbytes);
start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
if (fuidp) {
start = zfs_log_fuid_ids(fuidp, start);
(void) zfs_log_fuid_domains(fuidp, start);
}
}
itx->itx_sync = (zp->z_sync_cnt != 0);
seq = zil_itx_assign(zilog, itx, tx);
zp->z_last_itx = seq;
}
+876
View File
@@ -0,0 +1,876 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)zfs_replay.c 1.7 08/01/14 SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/vfs.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_acl.h>
#include <sys/zfs_fuid.h>
#include <sys/spa.h>
#include <sys/zil.h>
#include <sys/byteorder.h>
#include <sys/stat.h>
#include <sys/mode.h>
#include <sys/acl.h>
#include <sys/atomic.h>
#include <sys/cred.h>
/*
* Functions to replay ZFS intent log (ZIL) records
* The functions are called through a function vector (zfs_replay_vector)
* which is indexed by the transaction type.
*/
static void
zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
{
bzero(vap, sizeof (*vap));
vap->va_mask = (uint_t)mask;
vap->va_type = IFTOVT(mode);
vap->va_mode = mode & MODEMASK;
vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
vap->va_rdev = zfs_cmpldev(rdev);
vap->va_nodeid = nodeid;
}
/* ARGSUSED */
static int
zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
{
return (ENOTSUP);
}
static void
zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
{
xoptattr_t *xoap = NULL;
uint64_t *attrs;
uint64_t *crtime;
uint32_t *bitmap;
void *scanstamp;
int i;
xvap->xva_vattr.va_mask |= AT_XVATTR;
if ((xoap = xva_getxoptattr(xvap)) == NULL) {
xvap->xva_vattr.va_mask &= ~AT_XVATTR; /* shouldn't happen */
return;
}
ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize);
bitmap = &lrattr->lr_attr_bitmap;
for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
xvap->xva_reqattrmap[i] = *bitmap;
attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
crtime = attrs + 1;
scanstamp = (caddr_t)(crtime + 2);
if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0);
if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0);
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0);
if (XVA_ISSET_REQ(xvap, XAT_READONLY))
xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0);
if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0);
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0);
if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0);
if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0);
if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0);
if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0);
if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
xoap->xoa_av_quarantined =
((*attrs & XAT0_AV_QUARANTINED) != 0);
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime);
if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
}
static int
zfs_replay_domain_cnt(uint64_t uid, uint64_t gid)
{
uint64_t uid_idx;
uint64_t gid_idx;
int domcnt = 0;
uid_idx = FUID_INDEX(uid);
gid_idx = FUID_INDEX(gid);
if (uid_idx)
domcnt++;
if (gid_idx > 0 && gid_idx != uid_idx)
domcnt++;
return (domcnt);
}
static void *
zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start,
int domcnt)
{
int i;
for (i = 0; i != domcnt; i++) {
fuid_infop->z_domain_table[i] = start;
start = (caddr_t)start + strlen(start) + 1;
}
return (start);
}
/*
* Set the uid/gid in the fuid_info structure.
*/
static void
zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid)
{
/*
* If owner or group are log specific FUIDs then slurp up
* domain information and build zfs_fuid_info_t
*/
if (IS_EPHEMERAL(uid))
fuid_infop->z_fuid_owner = uid;
if (IS_EPHEMERAL(gid))
fuid_infop->z_fuid_group = gid;
}
/*
* Load fuid domains into fuid_info_t
*/
static zfs_fuid_info_t *
zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
{
int domcnt;
zfs_fuid_info_t *fuid_infop;
fuid_infop = zfs_fuid_info_alloc();
domcnt = zfs_replay_domain_cnt(uid, gid);
if (domcnt == 0)
return (fuid_infop);
fuid_infop->z_domain_table =
kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
zfs_replay_fuid_ugid(fuid_infop, uid, gid);
fuid_infop->z_domain_cnt = domcnt;
*end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt);
return (fuid_infop);
}
/*
* load zfs_fuid_t's and fuid_domains into fuid_info_t
*/
static zfs_fuid_info_t *
zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
uint64_t gid)
{
uint64_t *log_fuid = (uint64_t *)start;
zfs_fuid_info_t *fuid_infop;
int i;
fuid_infop = zfs_fuid_info_alloc();
fuid_infop->z_domain_cnt = domcnt;
fuid_infop->z_domain_table =
kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
for (i = 0; i != idcnt; i++) {
zfs_fuid_t *zfuid;
zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
zfuid->z_logfuid = *log_fuid;
zfuid->z_id = -1;
zfuid->z_domidx = 0;
list_insert_tail(&fuid_infop->z_fuids, zfuid);
log_fuid++;
}
zfs_replay_fuid_ugid(fuid_infop, uid, gid);
*end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt);
return (fuid_infop);
}
static void
zfs_replay_swap_attrs(lr_attr_t *lrattr)
{
/* swap the lr_attr structure */
byteswap_uint32_array(lrattr, sizeof (*lrattr));
/* swap the bitmap */
byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
sizeof (uint32_t));
/* swap the attributes, create time + 64 bit word for attributes */
byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
(lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
}
/*
* Replay file create with optional ACL, xvattr information as well
* as option FUID information.
*/
static int
zfs_replay_create_acl(zfsvfs_t *zfsvfs,
lr_acl_create_t *lracl, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
lr_create_t *lr = (lr_create_t *)lracl;
znode_t *dzp;
vnode_t *vp = NULL;
xvattr_t xva;
int vflg = 0;
vsecattr_t vsec = { 0 };
lr_attr_t *lrattr;
void *aclstart;
void *fuidstart;
size_t xvatlen = 0;
uint64_t txtype;
int error;
if (byteswap) {
byteswap_uint64_array(lracl, sizeof (*lracl));
txtype = (int)lr->lr_common.lrc_txtype;
if (txtype == TX_CREATE_ACL_ATTR ||
txtype == TX_MKDIR_ACL_ATTR) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
zfs_replay_swap_attrs(lrattr);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
}
aclstart = (caddr_t)(lracl + 1) + xvatlen;
zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
/* swap fuids */
if (lracl->lr_fuidcnt) {
byteswap_uint64_array((caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
lracl->lr_fuidcnt * sizeof (uint64_t));
}
}
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
* creation time and generation number. The generic VOP_CREATE()
* doesn't have either concept, so we smuggle the values inside
* the vattr's otherwise unused va_ctime and va_nblocks fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
goto bail;
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
switch ((int)lr->lr_common.lrc_txtype) {
case TX_CREATE_ACL:
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
case TX_CREATE_ACL_ATTR:
if (name == NULL) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
xva.xva_vattr.va_mask |= AT_XVATTR;
zfs_replay_xvattr(lrattr, &xva);
}
vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
if (zfsvfs->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
0, 0, &vp, kcred, vflg, NULL, &vsec);
break;
case TX_MKDIR_ACL:
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
case TX_MKDIR_ACL_ATTR:
if (name == NULL) {
lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr(lrattr, &xva);
}
vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
if (zfsvfs->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
}
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
&vp, kcred, NULL, vflg, &vsec);
break;
default:
error = ENOTSUP;
}
bail:
if (error == 0 && vp != NULL)
VN_RELE(vp);
VN_RELE(ZTOV(dzp));
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
return (error);
}
static int
zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
char *link; /* symlink content follows name */
znode_t *dzp;
vnode_t *vp = NULL;
xvattr_t xva;
int vflg = 0;
size_t lrsize = sizeof (lr_create_t);
lr_attr_t *lrattr;
void *start;
size_t xvatlen;
uint64_t txtype;
int error;
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
txtype = (int)lr->lr_common.lrc_txtype;
if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
}
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
* creation time and generation number. The generic VOP_CREATE()
* doesn't have either concept, so we smuggle the values inside
* the vattr's otherwise unused va_ctime and va_nblocks fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
goto out;
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
/*
* Symlinks don't have fuid info, and CIFS never creates
* symlinks.
*
* The _ATTR versions will grab the fuid info in their subcases.
*/
if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
(int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
(int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
start = (lr + 1);
zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
}
switch ((int)lr->lr_common.lrc_txtype) {
case TX_CREATE_ATTR:
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
/*FALLTHROUGH*/
case TX_CREATE:
if (name == NULL)
name = (char *)start;
error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
0, 0, &vp, kcred, vflg, NULL, NULL);
break;
case TX_MKDIR_ATTR:
lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
/*FALLTHROUGH*/
case TX_MKDIR:
if (name == NULL)
name = (char *)(lr + 1);
error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
&vp, kcred, NULL, vflg, NULL);
break;
case TX_MKXATTR:
name = (char *)(lr + 1);
error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
break;
case TX_SYMLINK:
name = (char *)(lr + 1);
link = name + strlen(name) + 1;
error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr,
link, kcred, NULL, vflg);
break;
default:
error = ENOTSUP;
}
out:
if (error == 0 && vp != NULL)
VN_RELE(vp);
VN_RELE(ZTOV(dzp));
if (zfsvfs->z_fuid_replay)
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
return (error);
}
static int
zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_remove_t */
znode_t *dzp;
int error;
int vflg = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
switch ((int)lr->lr_common.lrc_txtype) {
case TX_REMOVE:
error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg);
break;
case TX_RMDIR:
error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg);
break;
default:
error = ENOTSUP;
}
VN_RELE(ZTOV(dzp));
return (error);
}
static int
zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_link_t */
znode_t *dzp, *zp;
int error;
int vflg = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
VN_RELE(ZTOV(dzp));
return (error);
}
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg);
VN_RELE(ZTOV(zp));
VN_RELE(ZTOV(dzp));
return (error);
}
static int
zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
{
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
znode_t *sdzp, *tdzp;
int error;
int vflg = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
return (error);
if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
VN_RELE(ZTOV(sdzp));
return (error);
}
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred,
NULL, vflg);
VN_RELE(ZTOV(tdzp));
VN_RELE(ZTOV(sdzp));
return (error);
}
static int
zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
{
char *data = (char *)(lr + 1); /* data follows lr_write_t */
znode_t *zp;
int error;
ssize_t resid;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log writes out of order, it's possible the
* file has been removed. In this case just drop the write
* and return success.
*/
if (error == ENOENT)
error = 0;
return (error);
}
error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length,
lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
VN_RELE(ZTOV(zp));
return (error);
}
static int
zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
{
znode_t *zp;
flock64_t fl;
int error;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log truncates out of order, it's possible the
* file has been removed. In this case just drop the truncate
* and return success.
*/
if (error == ENOENT)
error = 0;
return (error);
}
bzero(&fl, sizeof (fl));
fl.l_type = F_WRLCK;
fl.l_whence = 0;
fl.l_start = lr->lr_offset;
fl.l_len = lr->lr_length;
error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
lr->lr_offset, kcred, NULL);
VN_RELE(ZTOV(zp));
return (error);
}
static int
zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
{
znode_t *zp;
xvattr_t xva;
vattr_t *vap = &xva.xva_vattr;
int error;
void *start;
xva_init(&xva);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
if ((lr->lr_mask & AT_XVATTR) &&
zfsvfs->z_version >= ZPL_VERSION_INITIAL)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log setattrs out of order, it's possible the
* file has been removed. In this case just drop the setattr
* and return success.
*/
if (error == ENOENT)
error = 0;
return (error);
}
zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
vap->va_size = lr->lr_size;
ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
/*
* Fill in xvattr_t portions if necessary.
*/
start = (lr_setattr_t *)(lr + 1);
if (vap->va_mask & AT_XVATTR) {
zfs_replay_xvattr((lr_attr_t *)start, &xva);
start = (caddr_t)start +
ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize);
} else
xva.xva_vattr.va_mask &= ~AT_XVATTR;
zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL);
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
VN_RELE(ZTOV(zp));
return (error);
}
static int
zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
vsecattr_t vsa;
znode_t *zp;
int error;
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log acls out of order, it's possible the
* file has been removed. In this case just drop the acl
* and return success.
*/
if (error == ENOENT)
error = 0;
return (error);
}
bzero(&vsa, sizeof (vsa));
vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
vsa.vsa_aclcnt = lr->lr_aclcnt;
vsa.vsa_aclentp = ace;
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL);
VN_RELE(ZTOV(zp));
return (error);
}
/*
* Replaying ACLs is complicated by FUID support.
* The log record may contain some optional data
* to be used for replaying FUID's. These pieces
* are the actual FUIDs that were created initially.
* The FUID table index may no longer be valid and
* during zfs_create() a new index may be assigned.
* Because of this the log will contain the original
* doman+rid in order to create a new FUID.
*
* The individual ACEs may contain an ephemeral uid/gid which is no
* longer valid and will need to be replaced with an actual FUID.
*
*/
static int
zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1);
vsecattr_t vsa;
znode_t *zp;
int error;
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
if (lr->lr_fuidcnt) {
byteswap_uint64_array((caddr_t)ace +
ZIL_ACE_LENGTH(lr->lr_acl_bytes),
lr->lr_fuidcnt * sizeof (uint64_t));
}
}
if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log acls out of order, it's possible the
* file has been removed. In this case just drop the acl
* and return success.
*/
if (error == ENOENT)
error = 0;
return (error);
}
bzero(&vsa, sizeof (vsa));
vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
vsa.vsa_aclcnt = lr->lr_aclcnt;
vsa.vsa_aclentp = ace;
vsa.vsa_aclentsz = lr->lr_acl_bytes;
vsa.vsa_aclflags = lr->lr_acl_flags;
if (lr->lr_fuidcnt) {
void *fuidstart = (caddr_t)ace +
ZIL_ACE_LENGTH(lr->lr_acl_bytes);
zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart, &fuidstart,
lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
}
error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL);
if (zfsvfs->z_fuid_replay)
zfs_fuid_info_free(zfsvfs->z_fuid_replay);
zfsvfs->z_fuid_replay = NULL;
VN_RELE(ZTOV(zp));
return (error);
}
/*
* Callback vectors for replaying records
*/
zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_error, /* 0 no such transaction type */
zfs_replay_create, /* TX_CREATE */
zfs_replay_create, /* TX_MKDIR */
zfs_replay_create, /* TX_MKXATTR */
zfs_replay_create, /* TX_SYMLINK */
zfs_replay_remove, /* TX_REMOVE */
zfs_replay_remove, /* TX_RMDIR */
zfs_replay_link, /* TX_LINK */
zfs_replay_rename, /* TX_RENAME */
zfs_replay_write, /* TX_WRITE */
zfs_replay_truncate, /* TX_TRUNCATE */
zfs_replay_setattr, /* TX_SETATTR */
zfs_replay_acl_v0, /* TX_ACL_V0 */
zfs_replay_acl, /* TX_ACL */
zfs_replay_create_acl, /* TX_CREATE_ACL */
zfs_replay_create, /* TX_CREATE_ATTR */
zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL */
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
};
+602
View File
@@ -0,0 +1,602 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)zfs_rlock.c 1.4 07/08/08 SMI"
/*
* This file contains the code to implement file range locking in
* ZFS, although there isn't much specific to ZFS (all that comes to mind
* support for growing the blocksize).
*
* Interface
* ---------
* Defined in zfs_rlock.h but essentially:
* rl = zfs_range_lock(zp, off, len, lock_type);
* zfs_range_unlock(rl);
* zfs_range_reduce(rl, off, len);
*
* AVL tree
* --------
* An AVL tree is used to maintain the state of the existing ranges
* that are locked for exclusive (writer) or shared (reader) use.
* The starting range offset is used for searching and sorting the tree.
*
* Common case
* -----------
* The (hopefully) usual case is of no overlaps or contention for
* locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
* searched that finds no overlap, and *this* rl_t is placed in the tree.
*
* Overlaps/Reference counting/Proxy locks
* ---------------------------------------
* The avl code only allows one node at a particular offset. Also it's very
* inefficient to search through all previous entries looking for overlaps
* (because the very 1st in the ordered list might be at offset 0 but
* cover the whole file).
* So this implementation uses reference counts and proxy range locks.
* Firstly, only reader locks use reference counts and proxy locks,
* because writer locks are exclusive.
* When a reader lock overlaps with another then a proxy lock is created
* for that range and replaces the original lock. If the overlap
* is exact then the reference count of the proxy is simply incremented.
* Otherwise, the proxy lock is split into smaller lock ranges and
* new proxy locks created for non overlapping ranges.
* The reference counts are adjusted accordingly.
* Meanwhile, the orginal lock is kept around (this is the callers handle)
* and its offset and length are used when releasing the lock.
*
* Thread coordination
* -------------------
* In order to make wakeups efficient and to ensure multiple continuous
* readers on a range don't starve a writer for the same range lock,
* two condition variables are allocated in each rl_t.
* If a writer (or reader) can't get a range it initialises the writer
* (or reader) cv; sets a flag saying there's a writer (or reader) waiting;
* and waits on that cv. When a thread unlocks that range it wakes up all
* writers then all readers before destroying the lock.
*
* Append mode writes
* ------------------
* Append mode writes need to lock a range at the end of a file.
* The offset of the end of the file is determined under the
* range locking mutex, and the lock type converted from RL_APPEND to
* RL_WRITER and the range locked.
*
* Grow block handling
* -------------------
* ZFS supports multiple block sizes currently upto 128K. The smallest
* block size is used for the file which is grown as needed. During this
* growth all other writers and readers must be excluded.
* So if the block size needs to be grown then the whole file is
* exclusively locked, then later the caller will reduce the lock
* range to just the range to be written using zfs_reduce_range.
*/
#include <sys/zfs_rlock.h>
/*
* Check if a write lock can be grabbed, or wait and recheck until available.
*/
static void
zfs_range_lock_writer(znode_t *zp, rl_t *new)
{
avl_tree_t *tree = &zp->z_range_avl;
rl_t *rl;
avl_index_t where;
uint64_t end_size;
uint64_t off = new->r_off;
uint64_t len = new->r_len;
for (;;) {
/*
* Range locking is also used by zvol and uses a
* dummied up znode. However, for zvol, we don't need to
* append or grow blocksize, and besides we don't have
* a z_phys or z_zfsvfs - so skip that processing.
*
* Yes, this is ugly, and would be solved by not handling
* grow or append in range lock code. If that was done then
* we could make the range locking code generically available
* to other non-zfs consumers.
*/
if (zp->z_vnode) { /* caller is ZPL */
/*
* If in append mode pick up the current end of file.
* This is done under z_range_lock to avoid races.
*/
if (new->r_type == RL_APPEND)
new->r_off = zp->z_phys->zp_size;
/*
* If we need to grow the block size then grab the whole
* file range. This is also done under z_range_lock to
* avoid races.
*/
end_size = MAX(zp->z_phys->zp_size, new->r_off + len);
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
new->r_off = 0;
new->r_len = UINT64_MAX;
}
}
/*
* First check for the usual case of no locks
*/
if (avl_numnodes(tree) == 0) {
new->r_type = RL_WRITER; /* convert to writer */
avl_add(tree, new);
return;
}
/*
* Look for any locks in the range.
*/
rl = avl_find(tree, new, &where);
if (rl)
goto wait; /* already locked at same offset */
rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
if (rl && (rl->r_off < new->r_off + new->r_len))
goto wait;
rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
if (rl && rl->r_off + rl->r_len > new->r_off)
goto wait;
new->r_type = RL_WRITER; /* convert possible RL_APPEND */
avl_insert(tree, new, where);
return;
wait:
if (!rl->r_write_wanted) {
cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
rl->r_write_wanted = B_TRUE;
}
cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
/* reset to original */
new->r_off = off;
new->r_len = len;
}
}
/*
* If this is an original (non-proxy) lock then replace it by
* a proxy and return the proxy.
*/
static rl_t *
zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
{
rl_t *proxy;
if (rl->r_proxy)
return (rl); /* already a proxy */
ASSERT3U(rl->r_cnt, ==, 1);
ASSERT(rl->r_write_wanted == B_FALSE);
ASSERT(rl->r_read_wanted == B_FALSE);
avl_remove(tree, rl);
rl->r_cnt = 0;
/* create a proxy range lock */
proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
proxy->r_off = rl->r_off;
proxy->r_len = rl->r_len;
proxy->r_cnt = 1;
proxy->r_type = RL_READER;
proxy->r_proxy = B_TRUE;
proxy->r_write_wanted = B_FALSE;
proxy->r_read_wanted = B_FALSE;
avl_add(tree, proxy);
return (proxy);
}
/*
* Split the range lock at the supplied offset
* returning the *front* proxy.
*/
static rl_t *
zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
{
rl_t *front, *rear;
ASSERT3U(rl->r_len, >, 1);
ASSERT3U(off, >, rl->r_off);
ASSERT3U(off, <, rl->r_off + rl->r_len);
ASSERT(rl->r_write_wanted == B_FALSE);
ASSERT(rl->r_read_wanted == B_FALSE);
/* create the rear proxy range lock */
rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
rear->r_off = off;
rear->r_len = rl->r_off + rl->r_len - off;
rear->r_cnt = rl->r_cnt;
rear->r_type = RL_READER;
rear->r_proxy = B_TRUE;
rear->r_write_wanted = B_FALSE;
rear->r_read_wanted = B_FALSE;
front = zfs_range_proxify(tree, rl);
front->r_len = off - rl->r_off;
avl_insert_here(tree, rear, front, AVL_AFTER);
return (front);
}
/*
* Create and add a new proxy range lock for the supplied range.
*/
static void
zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
{
rl_t *rl;
ASSERT(len);
rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
rl->r_off = off;
rl->r_len = len;
rl->r_cnt = 1;
rl->r_type = RL_READER;
rl->r_proxy = B_TRUE;
rl->r_write_wanted = B_FALSE;
rl->r_read_wanted = B_FALSE;
avl_add(tree, rl);
}
static void
zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
{
rl_t *next;
uint64_t off = new->r_off;
uint64_t len = new->r_len;
/*
* prev arrives either:
* - pointing to an entry at the same offset
* - pointing to the entry with the closest previous offset whose
* range may overlap with the new range
* - null, if there were no ranges starting before the new one
*/
if (prev) {
if (prev->r_off + prev->r_len <= off) {
prev = NULL;
} else if (prev->r_off != off) {
/*
* convert to proxy if needed then
* split this entry and bump ref count
*/
prev = zfs_range_split(tree, prev, off);
prev = AVL_NEXT(tree, prev); /* move to rear range */
}
}
ASSERT((prev == NULL) || (prev->r_off == off));
if (prev)
next = prev;
else
next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
if (next == NULL || off + len <= next->r_off) {
/* no overlaps, use the original new rl_t in the tree */
avl_insert(tree, new, where);
return;
}
if (off < next->r_off) {
/* Add a proxy for initial range before the overlap */
zfs_range_new_proxy(tree, off, next->r_off - off);
}
new->r_cnt = 0; /* will use proxies in tree */
/*
* We now search forward through the ranges, until we go past the end
* of the new range. For each entry we make it a proxy if it
* isn't already, then bump its reference count. If there's any
* gaps between the ranges then we create a new proxy range.
*/
for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
if (off + len <= next->r_off)
break;
if (prev && prev->r_off + prev->r_len < next->r_off) {
/* there's a gap */
ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
next->r_off - (prev->r_off + prev->r_len));
}
if (off + len == next->r_off + next->r_len) {
/* exact overlap with end */
next = zfs_range_proxify(tree, next);
next->r_cnt++;
return;
}
if (off + len < next->r_off + next->r_len) {
/* new range ends in the middle of this block */
next = zfs_range_split(tree, next, off + len);
next->r_cnt++;
return;
}
ASSERT3U(off + len, >, next->r_off + next->r_len);
next = zfs_range_proxify(tree, next);
next->r_cnt++;
}
/* Add the remaining end range. */
zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
(off + len) - (prev->r_off + prev->r_len));
}
/*
* Check if a reader lock can be grabbed, or wait and recheck until available.
*/
static void
zfs_range_lock_reader(znode_t *zp, rl_t *new)
{
avl_tree_t *tree = &zp->z_range_avl;
rl_t *prev, *next;
avl_index_t where;
uint64_t off = new->r_off;
uint64_t len = new->r_len;
/*
* Look for any writer locks in the range.
*/
retry:
prev = avl_find(tree, new, &where);
if (prev == NULL)
prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
/*
* Check the previous range for a writer lock overlap.
*/
if (prev && (off < prev->r_off + prev->r_len)) {
if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
if (!prev->r_read_wanted) {
cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
prev->r_read_wanted = B_TRUE;
}
cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
goto retry;
}
if (off + len < prev->r_off + prev->r_len)
goto got_lock;
}
/*
* Search through the following ranges to see if there's
* write lock any overlap.
*/
if (prev)
next = AVL_NEXT(tree, prev);
else
next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
for (; next; next = AVL_NEXT(tree, next)) {
if (off + len <= next->r_off)
goto got_lock;
if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
if (!next->r_read_wanted) {
cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
next->r_read_wanted = B_TRUE;
}
cv_wait(&next->r_rd_cv, &zp->z_range_lock);
goto retry;
}
if (off + len <= next->r_off + next->r_len)
goto got_lock;
}
got_lock:
/*
* Add the read lock, which may involve splitting existing
* locks and bumping ref counts (r_cnt).
*/
zfs_range_add_reader(tree, new, prev, where);
}
/*
* Lock a range (offset, length) as either shared (RL_READER)
* or exclusive (RL_WRITER). Returns the range lock structure
* for later unlocking or reduce range (if entire file
* previously locked as RL_WRITER).
*/
rl_t *
zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
{
rl_t *new;
ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
new->r_zp = zp;
new->r_off = off;
new->r_len = len;
new->r_cnt = 1; /* assume it's going to be in the tree */
new->r_type = type;
new->r_proxy = B_FALSE;
new->r_write_wanted = B_FALSE;
new->r_read_wanted = B_FALSE;
mutex_enter(&zp->z_range_lock);
if (type == RL_READER) {
/*
* First check for the usual case of no locks
*/
if (avl_numnodes(&zp->z_range_avl) == 0)
avl_add(&zp->z_range_avl, new);
else
zfs_range_lock_reader(zp, new);
} else
zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
mutex_exit(&zp->z_range_lock);
return (new);
}
/*
* Unlock a reader lock
*/
static void
zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
{
avl_tree_t *tree = &zp->z_range_avl;
rl_t *rl, *next;
uint64_t len;
/*
* The common case is when the remove entry is in the tree
* (cnt == 1) meaning there's been no other reader locks overlapping
* with this one. Otherwise the remove entry will have been
* removed from the tree and replaced by proxies (one or
* more ranges mapping to the entire range).
*/
if (remove->r_cnt == 1) {
avl_remove(tree, remove);
if (remove->r_write_wanted) {
cv_broadcast(&remove->r_wr_cv);
cv_destroy(&remove->r_wr_cv);
}
if (remove->r_read_wanted) {
cv_broadcast(&remove->r_rd_cv);
cv_destroy(&remove->r_rd_cv);
}
} else {
ASSERT3U(remove->r_cnt, ==, 0);
ASSERT3U(remove->r_write_wanted, ==, 0);
ASSERT3U(remove->r_read_wanted, ==, 0);
/*
* Find start proxy representing this reader lock,
* then decrement ref count on all proxies
* that make up this range, freeing them as needed.
*/
rl = avl_find(tree, remove, NULL);
ASSERT(rl);
ASSERT(rl->r_cnt);
ASSERT(rl->r_type == RL_READER);
for (len = remove->r_len; len != 0; rl = next) {
len -= rl->r_len;
if (len) {
next = AVL_NEXT(tree, rl);
ASSERT(next);
ASSERT(rl->r_off + rl->r_len == next->r_off);
ASSERT(next->r_cnt);
ASSERT(next->r_type == RL_READER);
}
rl->r_cnt--;
if (rl->r_cnt == 0) {
avl_remove(tree, rl);
if (rl->r_write_wanted) {
cv_broadcast(&rl->r_wr_cv);
cv_destroy(&rl->r_wr_cv);
}
if (rl->r_read_wanted) {
cv_broadcast(&rl->r_rd_cv);
cv_destroy(&rl->r_rd_cv);
}
kmem_free(rl, sizeof (rl_t));
}
}
}
kmem_free(remove, sizeof (rl_t));
}
/*
* Unlock range and destroy range lock structure.
*/
void
zfs_range_unlock(rl_t *rl)
{
znode_t *zp = rl->r_zp;
ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
ASSERT(!rl->r_proxy);
mutex_enter(&zp->z_range_lock);
if (rl->r_type == RL_WRITER) {
/* writer locks can't be shared or split */
avl_remove(&zp->z_range_avl, rl);
mutex_exit(&zp->z_range_lock);
if (rl->r_write_wanted) {
cv_broadcast(&rl->r_wr_cv);
cv_destroy(&rl->r_wr_cv);
}
if (rl->r_read_wanted) {
cv_broadcast(&rl->r_rd_cv);
cv_destroy(&rl->r_rd_cv);
}
kmem_free(rl, sizeof (rl_t));
} else {
/*
* lock may be shared, let zfs_range_unlock_reader()
* release the lock and free the rl_t
*/
zfs_range_unlock_reader(zp, rl);
mutex_exit(&zp->z_range_lock);
}
}
/*
* Reduce range locked as RL_WRITER from whole file to specified range.
* Asserts the whole file is exclusivly locked and so there's only one
* entry in the tree.
*/
void
zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
{
znode_t *zp = rl->r_zp;
/* Ensure there are no other locks */
ASSERT(avl_numnodes(&zp->z_range_avl) == 1);
ASSERT(rl->r_off == 0);
ASSERT(rl->r_type == RL_WRITER);
ASSERT(!rl->r_proxy);
ASSERT3U(rl->r_len, ==, UINT64_MAX);
ASSERT3U(rl->r_cnt, ==, 1);
mutex_enter(&zp->z_range_lock);
rl->r_off = off;
rl->r_len = len;
mutex_exit(&zp->z_range_lock);
if (rl->r_write_wanted)
cv_broadcast(&rl->r_wr_cv);
if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv);
}
/*
* AVL comparison function used to order range locks
* Locks are ordered on the start offset of the range.
*/
int
zfs_range_compare(const void *arg1, const void *arg2)
{
const rl_t *rl1 = arg1;
const rl_t *rl2 = arg2;
if (rl1->r_off > rl2->r_off)
return (1);
if (rl1->r_off < rl2->r_off)
return (-1);
return (0);
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+34
View File
@@ -0,0 +1,34 @@
subdir-m += include
DISTFILES = libnvpair.c nvpair.c nvpair_alloc_fixed.c nvpair_alloc_system.c
MODULE := znvpair
LIBRARY := libnvpair
# Compile as kernel module. Needed symlinks created for all
# k* objects created by top level configure script.
EXTRA_CFLAGS = @KERNELCPPFLAGS@
EXTRA_CFLAGS += -I@LIBDIR@/libnvpair/include
obj-m := ${MODULE}.o
${MODULE}-objs += knvpair.o # Interfaces name/value pairs
${MODULE}-objs += nvpair_alloc_spl.o # Generic alloc/free support
# Compile as shared library. There's an extra useless host program
# here called 'zu' because it was the easiest way I could convince
# the kernel build system to construct a user space shared library.
HOSTCFLAGS += @HOSTCFLAGS@
HOSTCFLAGS += -I@LIBDIR@/libsolcompat/include
HOSTCFLAGS += -I@LIBDIR@/libport/include
HOSTCFLAGS += -I@LIBDIR@/libnvpair/include
hostprogs-y := zu
always := $(hostprogs-y)
zu-objs := zu.o ${LIBRARY}.so
${LIBRARY}-objs += unvpair.o
${LIBRARY}-objs += nvpair_alloc_system.o
${LIBRARY}-objs += libnvpair.o
+2
View File
@@ -0,0 +1,2 @@
subdir-m += sys
DISTFILES = libnvpair.h
+46
View File
@@ -0,0 +1,46 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _LIBNVPAIR_H
#define _LIBNVPAIR_H
#include <sys/nvpair.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
void nvlist_print(FILE *, nvlist_t *);
#ifdef __cplusplus
}
#endif
#endif /* _LIBNVPAIR_H */
@@ -0,0 +1 @@
DISTFILES = nvpair.h nvpair_impl.h
+262
View File
@@ -0,0 +1,262 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_NVPAIR_H
#define _SYS_NVPAIR_H
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/va_list.h>
#if defined(_KERNEL) && !defined(_BOOT)
#include <sys/kmem.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
DATA_TYPE_UNKNOWN = 0,
DATA_TYPE_BOOLEAN,
DATA_TYPE_BYTE,
DATA_TYPE_INT16,
DATA_TYPE_UINT16,
DATA_TYPE_INT32,
DATA_TYPE_UINT32,
DATA_TYPE_INT64,
DATA_TYPE_UINT64,
DATA_TYPE_STRING,
DATA_TYPE_BYTE_ARRAY,
DATA_TYPE_INT16_ARRAY,
DATA_TYPE_UINT16_ARRAY,
DATA_TYPE_INT32_ARRAY,
DATA_TYPE_UINT32_ARRAY,
DATA_TYPE_INT64_ARRAY,
DATA_TYPE_UINT64_ARRAY,
DATA_TYPE_STRING_ARRAY,
DATA_TYPE_HRTIME,
DATA_TYPE_NVLIST,
DATA_TYPE_NVLIST_ARRAY,
DATA_TYPE_BOOLEAN_VALUE,
DATA_TYPE_INT8,
DATA_TYPE_UINT8,
DATA_TYPE_BOOLEAN_ARRAY,
DATA_TYPE_INT8_ARRAY,
DATA_TYPE_UINT8_ARRAY
} data_type_t;
typedef struct nvpair {
int32_t nvp_size; /* size of this nvpair */
int16_t nvp_name_sz; /* length of name string */
int16_t nvp_reserve; /* not used */
int32_t nvp_value_elem; /* number of elements for array types */
data_type_t nvp_type; /* type of value */
/* name string */
/* aligned ptr array for string arrays */
/* aligned array of data for value */
} nvpair_t;
/* nvlist header */
typedef struct nvlist {
int32_t nvl_version;
uint32_t nvl_nvflag; /* persistent flags */
uint64_t nvl_priv; /* ptr to private data if not packed */
uint32_t nvl_flag;
int32_t nvl_pad; /* currently not used, for alignment */
} nvlist_t;
/* nvp implementation version */
#define NV_VERSION 0
/* nvlist pack encoding */
#define NV_ENCODE_NATIVE 0
#define NV_ENCODE_XDR 1
/* nvlist persistent unique name flags, stored in nvl_nvflags */
#define NV_UNIQUE_NAME 0x1
#define NV_UNIQUE_NAME_TYPE 0x2
/* nvlist lookup pairs related flags */
#define NV_FLAG_NOENTOK 0x1
/* convenience macros */
#define NV_ALIGN(x) (((ulong_t)(x) + 7ul) & ~7ul)
#define NV_ALIGN4(x) (((x) + 3) & ~3)
#define NVP_SIZE(nvp) ((nvp)->nvp_size)
#define NVP_NAME(nvp) ((char *)(nvp) + sizeof (nvpair_t))
#define NVP_TYPE(nvp) ((nvp)->nvp_type)
#define NVP_NELEM(nvp) ((nvp)->nvp_value_elem)
#define NVP_VALUE(nvp) ((char *)(nvp) + NV_ALIGN(sizeof (nvpair_t) \
+ (nvp)->nvp_name_sz))
#define NVL_VERSION(nvl) ((nvl)->nvl_version)
#define NVL_SIZE(nvl) ((nvl)->nvl_size)
#define NVL_FLAG(nvl) ((nvl)->nvl_flag)
/* NV allocator framework */
typedef struct nv_alloc_ops nv_alloc_ops_t;
typedef struct nv_alloc {
const nv_alloc_ops_t *nva_ops;
void *nva_arg;
} nv_alloc_t;
struct nv_alloc_ops {
int (*nv_ao_init)(nv_alloc_t *, __va_list);
void (*nv_ao_fini)(nv_alloc_t *);
void *(*nv_ao_alloc)(nv_alloc_t *, size_t);
void (*nv_ao_free)(nv_alloc_t *, void *, size_t);
void (*nv_ao_reset)(nv_alloc_t *);
};
extern const nv_alloc_ops_t *nv_fixed_ops;
extern nv_alloc_t *nv_alloc_nosleep;
#if defined(_KERNEL) && !defined(_BOOT)
extern nv_alloc_t *nv_alloc_sleep;
#endif
int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...);
void nv_alloc_reset(nv_alloc_t *);
void nv_alloc_fini(nv_alloc_t *);
/* list management */
int nvlist_alloc(nvlist_t **, uint_t, int);
void nvlist_free(nvlist_t *);
int nvlist_size(nvlist_t *, size_t *, int);
int nvlist_pack(nvlist_t *, char **, size_t *, int, int);
int nvlist_unpack(char *, size_t, nvlist_t **, int);
int nvlist_dup(nvlist_t *, nvlist_t **, int);
int nvlist_merge(nvlist_t *, nvlist_t *, int);
int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *);
int nvlist_xpack(nvlist_t *, char **, size_t *, int, nv_alloc_t *);
int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *);
int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *);
nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *);
int nvlist_add_nvpair(nvlist_t *, nvpair_t *);
int nvlist_add_boolean(nvlist_t *, const char *);
int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
int nvlist_add_byte(nvlist_t *, const char *, uchar_t);
int nvlist_add_int8(nvlist_t *, const char *, int8_t);
int nvlist_add_uint8(nvlist_t *, const char *, uint8_t);
int nvlist_add_int16(nvlist_t *, const char *, int16_t);
int nvlist_add_uint16(nvlist_t *, const char *, uint16_t);
int nvlist_add_int32(nvlist_t *, const char *, int32_t);
int nvlist_add_uint32(nvlist_t *, const char *, uint32_t);
int nvlist_add_int64(nvlist_t *, const char *, int64_t);
int nvlist_add_uint64(nvlist_t *, const char *, uint64_t);
int nvlist_add_string(nvlist_t *, const char *, const char *);
int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t);
int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t);
int nvlist_remove(nvlist_t *, const char *, data_type_t);
int nvlist_remove_all(nvlist_t *, const char *);
int nvlist_lookup_boolean(nvlist_t *, const char *);
int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *);
int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *);
int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *);
int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *);
int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *);
int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *);
int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *);
int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *);
int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *);
int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *);
int nvlist_lookup_string(nvlist_t *, const char *, char **);
int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **);
int nvlist_lookup_boolean_array(nvlist_t *, const char *,
boolean_t **, uint_t *);
int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, uint_t *);
int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, uint_t *);
int nvlist_lookup_uint8_array(nvlist_t *, const char *, uint8_t **, uint_t *);
int nvlist_lookup_int16_array(nvlist_t *, const char *, int16_t **, uint_t *);
int nvlist_lookup_uint16_array(nvlist_t *, const char *, uint16_t **, uint_t *);
int nvlist_lookup_int32_array(nvlist_t *, const char *, int32_t **, uint_t *);
int nvlist_lookup_uint32_array(nvlist_t *, const char *, uint32_t **, uint_t *);
int nvlist_lookup_int64_array(nvlist_t *, const char *, int64_t **, uint_t *);
int nvlist_lookup_uint64_array(nvlist_t *, const char *, uint64_t **, uint_t *);
int nvlist_lookup_string_array(nvlist_t *, const char *, char ***, uint_t *);
int nvlist_lookup_nvlist_array(nvlist_t *, const char *,
nvlist_t ***, uint_t *);
int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *);
int nvlist_lookup_pairs(nvlist_t *nvl, int, ...);
int nvlist_lookup_nvpair(nvlist_t *nvl, const char *, nvpair_t **);
boolean_t nvlist_exists(nvlist_t *nvl, const char *);
/* processing nvpair */
nvpair_t *nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *);
char *nvpair_name(nvpair_t *);
data_type_t nvpair_type(nvpair_t *);
int nvpair_value_boolean_value(nvpair_t *, boolean_t *);
int nvpair_value_byte(nvpair_t *, uchar_t *);
int nvpair_value_int8(nvpair_t *, int8_t *);
int nvpair_value_uint8(nvpair_t *, uint8_t *);
int nvpair_value_int16(nvpair_t *, int16_t *);
int nvpair_value_uint16(nvpair_t *, uint16_t *);
int nvpair_value_int32(nvpair_t *, int32_t *);
int nvpair_value_uint32(nvpair_t *, uint32_t *);
int nvpair_value_int64(nvpair_t *, int64_t *);
int nvpair_value_uint64(nvpair_t *, uint64_t *);
int nvpair_value_string(nvpair_t *, char **);
int nvpair_value_nvlist(nvpair_t *, nvlist_t **);
int nvpair_value_boolean_array(nvpair_t *, boolean_t **, uint_t *);
int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *);
int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *);
int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *);
int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *);
int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *);
int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *);
int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *);
int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *);
int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *);
int nvpair_value_string_array(nvpair_t *, char ***, uint_t *);
int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *);
int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_NVPAIR_H */
@@ -0,0 +1,73 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _NVPAIR_IMPL_H
#define _NVPAIR_IMPL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/nvpair.h>
/*
* The structures here provided for information and debugging purposes only
* may be changed in the future.
*/
/*
* implementation linked list for pre-packed data
*/
typedef struct i_nvp i_nvp_t;
struct i_nvp {
union {
uint64_t _nvi_align; /* ensure alignment */
struct {
i_nvp_t *_nvi_next; /* pointer to next nvpair */
i_nvp_t *_nvi_prev; /* pointer to prev nvpair */
} _nvi;
} _nvi_un;
nvpair_t nvi_nvp; /* nvpair */
};
#define nvi_next _nvi_un._nvi._nvi_next
#define nvi_prev _nvi_un._nvi._nvi_prev
typedef struct {
i_nvp_t *nvp_list; /* linked list of nvpairs */
i_nvp_t *nvp_last; /* last nvpair */
i_nvp_t *nvp_curr; /* current walker nvpair */
nv_alloc_t *nvp_nva; /* pluggable allocator */
uint32_t nvp_stat; /* internal state */
} nvpriv_t;
#ifdef __cplusplus
}
#endif
#endif /* _NVPAIR_IMPL_H */
+266
View File
@@ -0,0 +1,266 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <unistd.h>
#include <strings.h>
#include "libnvpair.h"
/*
* libnvpair - A tools library for manipulating <name, value> pairs.
*
* This library provides routines packing an unpacking nv pairs
* for transporting data across process boundaries, transporting
* between kernel and userland, and possibly saving onto disk files.
*/
static void
indent(FILE *fp, int depth)
{
while (depth-- > 0)
(void) fprintf(fp, "\t");
}
/*
* nvlist_print - Prints elements in an event buffer
*/
static
void
nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
{
int i;
char *name;
uint_t nelem;
nvpair_t *nvp;
if (nvl == NULL)
return;
indent(fp, depth);
(void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl));
nvp = nvlist_next_nvpair(nvl, NULL);
while (nvp) {
data_type_t type = nvpair_type(nvp);
indent(fp, depth);
name = nvpair_name(nvp);
(void) fprintf(fp, "\t%s =", name);
nelem = 0;
switch (type) {
case DATA_TYPE_BOOLEAN: {
(void) fprintf(fp, " 1");
break;
}
case DATA_TYPE_BOOLEAN_VALUE: {
boolean_t val;
(void) nvpair_value_boolean_value(nvp, &val);
(void) fprintf(fp, " %d", val);
break;
}
case DATA_TYPE_BYTE: {
uchar_t val;
(void) nvpair_value_byte(nvp, &val);
(void) fprintf(fp, " 0x%2.2x", val);
break;
}
case DATA_TYPE_INT8: {
int8_t val;
(void) nvpair_value_int8(nvp, &val);
(void) fprintf(fp, " %d", val);
break;
}
case DATA_TYPE_UINT8: {
uint8_t val;
(void) nvpair_value_uint8(nvp, &val);
(void) fprintf(fp, " 0x%x", val);
break;
}
case DATA_TYPE_INT16: {
int16_t val;
(void) nvpair_value_int16(nvp, &val);
(void) fprintf(fp, " %d", val);
break;
}
case DATA_TYPE_UINT16: {
uint16_t val;
(void) nvpair_value_uint16(nvp, &val);
(void) fprintf(fp, " 0x%x", val);
break;
}
case DATA_TYPE_INT32: {
int32_t val;
(void) nvpair_value_int32(nvp, &val);
(void) fprintf(fp, " %d", val);
break;
}
case DATA_TYPE_UINT32: {
uint32_t val;
(void) nvpair_value_uint32(nvp, &val);
(void) fprintf(fp, " 0x%x", val);
break;
}
case DATA_TYPE_INT64: {
int64_t val;
(void) nvpair_value_int64(nvp, &val);
(void) fprintf(fp, " %lld", (longlong_t)val);
break;
}
case DATA_TYPE_UINT64: {
uint64_t val;
(void) nvpair_value_uint64(nvp, &val);
(void) fprintf(fp, " 0x%llx", (u_longlong_t)val);
break;
}
case DATA_TYPE_STRING: {
char *val;
(void) nvpair_value_string(nvp, &val);
(void) fprintf(fp, " %s", val);
break;
}
case DATA_TYPE_BOOLEAN_ARRAY: {
boolean_t *val;
(void) nvpair_value_boolean_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %d", val[i]);
break;
}
case DATA_TYPE_BYTE_ARRAY: {
uchar_t *val;
(void) nvpair_value_byte_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " 0x%2.2x", val[i]);
break;
}
case DATA_TYPE_INT8_ARRAY: {
int8_t *val;
(void) nvpair_value_int8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %d", val[i]);
break;
}
case DATA_TYPE_UINT8_ARRAY: {
uint8_t *val;
(void) nvpair_value_uint8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " 0x%x", val[i]);
break;
}
case DATA_TYPE_INT16_ARRAY: {
int16_t *val;
(void) nvpair_value_int16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %d", val[i]);
break;
}
case DATA_TYPE_UINT16_ARRAY: {
uint16_t *val;
(void) nvpair_value_uint16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " 0x%x", val[i]);
break;
}
case DATA_TYPE_INT32_ARRAY: {
int32_t *val;
(void) nvpair_value_int32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %d", val[i]);
break;
}
case DATA_TYPE_UINT32_ARRAY: {
uint32_t *val;
(void) nvpair_value_uint32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " 0x%x", val[i]);
break;
}
case DATA_TYPE_INT64_ARRAY: {
int64_t *val;
(void) nvpair_value_int64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %lld", (longlong_t)val[i]);
break;
}
case DATA_TYPE_UINT64_ARRAY: {
uint64_t *val;
(void) nvpair_value_uint64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " 0x%llx",
(u_longlong_t)val[i]);
break;
}
case DATA_TYPE_STRING_ARRAY: {
char **val;
(void) nvpair_value_string_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
(void) fprintf(fp, " %s", val[i]);
break;
}
case DATA_TYPE_HRTIME: {
hrtime_t val;
(void) nvpair_value_hrtime(nvp, &val);
(void) fprintf(fp, " 0x%llx", val);
break;
}
case DATA_TYPE_NVLIST: {
nvlist_t *val;
(void) nvpair_value_nvlist(nvp, &val);
(void) fprintf(fp, " (embedded nvlist)\n");
nvlist_print_with_indent(fp, val, depth + 1);
indent(fp, depth + 1);
(void) fprintf(fp, "(end %s)\n", name);
break;
}
case DATA_TYPE_NVLIST_ARRAY: {
nvlist_t **val;
(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
(void) fprintf(fp, " (array of embedded nvlists)\n");
for (i = 0; i < nelem; i++) {
indent(fp, depth + 1);
(void) fprintf(fp,
"(start %s[%d])\n", name, i);
nvlist_print_with_indent(fp, val[i], depth + 1);
indent(fp, depth + 1);
(void) fprintf(fp, "(end %s[%d])\n", name, i);
}
break;
}
default:
(void) fprintf(fp, " unknown data type (%d)", type);
break;
}
(void) fprintf(fp, "\n");
nvp = nvlist_next_nvpair(nvl, nvp);
}
}
void
nvlist_print(FILE *fp, nvlist_t *nvl)
{
nvlist_print_with_indent(fp, nvl, 0);
}
File diff suppressed because it is too large Load Diff
+120
View File
@@ -0,0 +1,120 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/stropts.h>
#include <sys/isa_defs.h>
#include <sys/nvpair.h>
#include <sys/sysmacros.h>
#if defined(_KERNEL) && !defined(_BOOT)
#include <sys/varargs.h>
#else
#include <stdarg.h>
#include <strings.h>
#endif
/*
* This allocator is very simple.
* - it uses a pre-allocated buffer for memory allocations.
* - it does _not_ free memory in the pre-allocated buffer.
*
* The reason for the selected implemention is simplicity.
* This allocator is designed for the usage in interrupt context when
* the caller may not wait for free memory.
*/
/* pre-allocated buffer for memory allocations */
typedef struct nvbuf {
uintptr_t nvb_buf; /* address of pre-allocated buffer */
uintptr_t nvb_lim; /* limit address in the buffer */
uintptr_t nvb_cur; /* current address in the buffer */
} nvbuf_t;
/*
* Initialize the pre-allocated buffer allocator. The caller needs to supply
*
* buf address of pre-allocated buffer
* bufsz size of pre-allocated buffer
*
* nv_fixed_init() calculates the remaining members of nvbuf_t.
*/
static int
nv_fixed_init(nv_alloc_t *nva, va_list valist)
{
uintptr_t base = va_arg(valist, uintptr_t);
uintptr_t lim = base + va_arg(valist, size_t);
nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t));
if (base == 0 || (uintptr_t)&nvb[1] > lim)
return (EINVAL);
nvb->nvb_buf = (uintptr_t)&nvb[0];
nvb->nvb_cur = (uintptr_t)&nvb[1];
nvb->nvb_lim = lim;
nva->nva_arg = nvb;
return (0);
}
static void *
nv_fixed_alloc(nv_alloc_t *nva, size_t size)
{
nvbuf_t *nvb = nva->nva_arg;
uintptr_t new = nvb->nvb_cur;
if (size == 0 || new + size > nvb->nvb_lim)
return (NULL);
nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t));
return ((void *)new);
}
/*ARGSUSED*/
static void
nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size)
{
/* don't free memory in the pre-allocated buffer */
}
static void
nv_fixed_reset(nv_alloc_t *nva)
{
nvbuf_t *nvb = nva->nva_arg;
nvb->nvb_cur = (uintptr_t)&nvb[1];
}
const nv_alloc_ops_t nv_fixed_ops_def = {
nv_fixed_init, /* nv_ao_init() */
NULL, /* nv_ao_fini() */
nv_fixed_alloc, /* nv_ao_alloc() */
nv_fixed_free, /* nv_ao_free() */
nv_fixed_reset /* nv_ao_reset() */
};
const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def;
+59
View File
@@ -0,0 +1,59 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/nvpair.h>
#include <stdlib.h>
/*ARGSUSED*/
static void *
nv_alloc_sys(nv_alloc_t *nva, size_t size)
{
return (malloc(size));
}
/*ARGSUSED*/
static void
nv_free_sys(nv_alloc_t *nva, void *buf, size_t size)
{
free(buf);
}
const nv_alloc_ops_t system_ops_def = {
NULL, /* nv_ao_init() */
NULL, /* nv_ao_fini() */
nv_alloc_sys, /* nv_ao_alloc() */
nv_free_sys, /* nv_ao_free() */
NULL /* nv_ao_reset() */
};
nv_alloc_t nv_alloc_nosleep_def = {
&system_ops_def,
NULL
};
nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
+37
View File
@@ -0,0 +1,37 @@
subdir-m += include
DISTFILES = port.c strlcat.c strlcpy.c strnlen.c u8_textprep.c
MODULE := zport
LIBRARY := libzport
# Compile as kernel module. Needed symlinks created for all
# k* objects created by top level configure script.
EXTRA_CFLAGS = @KERNELCPPFLAGS@
EXTRA_CFLAGS += -I@LIBDIR@/libzcommon/include
EXTRA_CFLAGS += -I@LIBDIR@/libport/include
obj-m := ${MODULE}.o
${MODULE}-objs += spl.o
${MODULE}-objs += ku8_textprep.o
# Compile as shared library. There's an extra useless host program
# here called 'zu' because it was the easiest way I could convince
# the kernel build system to construct a user space shared library.
HOSTCFLAGS += @HOSTCFLAGS@
HOSTCFLAGS += -I@LIBDIR@/libzcommon/include
HOSTCFLAGS += -I@LIBDIR@/libport/include
hostprogs-y := zu
always := $(hostprogs-y)
zu-objs := zu.o ${LIBRARY}.so
${LIBRARY}-objs += strlcpy.o
${LIBRARY}-objs += strlcat.o
${LIBRARY}-objs += strnlen.o
${LIBRARY}-objs += port.o
${LIBRARY}-objs += u8_textprep.o
+4
View File
@@ -0,0 +1,4 @@
subdir-m += sys
DISTFILES = fake_ioctl.h libdiskmgt.h libshare.h mntent.h stdlib.h
DISTFILES += string.h strings.h stropts.h unistd.h
+41
View File
@@ -0,0 +1,41 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PORT_FAKE_IOCTL_H
#define _PORT_FAKE_IOCTL_H
static inline int real_ioctl(int fd, int request, void *arg)
{
return ioctl(fd, request, arg);
}
#ifdef WANT_FAKE_IOCTL
#include <sys/dmu_ctl.h>
#define ioctl(fd,req,arg) dctlc_ioctl(fd,req,arg)
#endif
#endif /* _PORT_FAKE_IOCTL_H */
+278
View File
@@ -0,0 +1,278 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_LIBDISKMGT_H
#include_next <libdiskmgt.h>
#else
#ifndef _LIBDISKMGT_H
#define _LIBDISKMGT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <libnvpair.h>
#include <sys/swap.h>
/*
* Holds all the data regarding the device.
* Private to libdiskmgt. Must use dm_xxx functions to set/get data.
*/
typedef uint64_t dm_descriptor_t;
typedef enum {
DM_WHO_MKFS = 0,
DM_WHO_ZPOOL,
DM_WHO_ZPOOL_FORCE,
DM_WHO_FORMAT,
DM_WHO_SWAP,
DM_WHO_DUMP,
DM_WHO_ZPOOL_SPARE
} dm_who_type_t;
typedef enum {
DM_DRIVE = 0,
DM_CONTROLLER,
DM_MEDIA,
DM_SLICE,
DM_PARTITION,
DM_PATH,
DM_ALIAS,
DM_BUS
} dm_desc_type_t;
typedef enum {
DM_DT_UNKNOWN = 0,
DM_DT_FIXED,
DM_DT_ZIP,
DM_DT_JAZ,
DM_DT_FLOPPY,
DM_DT_MO_ERASABLE,
DM_DT_MO_WRITEONCE,
DM_DT_AS_MO,
DM_DT_CDROM,
DM_DT_CDR,
DM_DT_CDRW,
DM_DT_DVDROM,
DM_DT_DVDR,
DM_DT_DVDRAM,
DM_DT_DVDRW,
DM_DT_DDCDROM,
DM_DT_DDCDR,
DM_DT_DDCDRW
} dm_drive_type_t;
typedef enum {
DM_MT_UNKNOWN = 0,
DM_MT_FIXED,
DM_MT_FLOPPY,
DM_MT_CDROM,
DM_MT_ZIP,
DM_MT_JAZ,
DM_MT_CDR,
DM_MT_CDRW,
DM_MT_DVDROM,
DM_MT_DVDR,
DM_MT_DVDRAM,
DM_MT_MO_ERASABLE,
DM_MT_MO_WRITEONCE,
DM_MT_AS_MO
} dm_media_type_t;
#define DM_FILTER_END -1
/* drive stat name */
typedef enum {
DM_DRV_STAT_PERFORMANCE = 0,
DM_DRV_STAT_DIAGNOSTIC,
DM_DRV_STAT_TEMPERATURE
} dm_drive_stat_t;
/* slice stat name */
typedef enum {
DM_SLICE_STAT_USE = 0
} dm_slice_stat_t;
/* attribute definitions */
/* drive */
#define DM_DISK_UP 1
#define DM_DISK_DOWN 0
#define DM_CLUSTERED "clustered"
#define DM_DRVTYPE "drvtype"
#define DM_FAILING "failing"
#define DM_LOADED "loaded" /* also in media */
#define DM_NDNRERRS "ndevice_not_ready_errors"
#define DM_NBYTESREAD "nbytes_read"
#define DM_NBYTESWRITTEN "nbytes_written"
#define DM_NHARDERRS "nhard_errors"
#define DM_NILLREQERRS "nillegal_req_errors"
#define DM_NMEDIAERRS "nmedia_errors"
#define DM_NNODEVERRS "nno_dev_errors"
#define DM_NREADOPS "nread_ops"
#define DM_NRECOVERRS "nrecoverable_errors"
#define DM_NSOFTERRS "nsoft_errors"
#define DM_NTRANSERRS "ntransport_errors"
#define DM_NWRITEOPS "nwrite_ops"
#define DM_OPATH "opath"
#define DM_PRODUCT_ID "product_id"
#define DM_REMOVABLE "removable" /* also in media */
#define DM_RPM "rpm"
#define DM_STATUS "status"
#define DM_SYNC_SPEED "sync_speed"
#define DM_TEMPERATURE "temperature"
#define DM_VENDOR_ID "vendor_id"
#define DM_WIDE "wide" /* also on controller */
#define DM_WWN "wwn"
/* bus */
#define DM_BTYPE "btype"
#define DM_CLOCK "clock" /* also on controller */
#define DM_PNAME "pname"
/* controller */
#define DM_FAST "fast"
#define DM_FAST20 "fast20"
#define DM_FAST40 "fast40"
#define DM_FAST80 "fast80"
#define DM_MULTIPLEX "multiplex"
#define DM_PATH_STATE "path_state"
#define DM_CTYPE_ATA "ata"
#define DM_CTYPE_SCSI "scsi"
#define DM_CTYPE_FIBRE "fibre channel"
#define DM_CTYPE_USB "usb"
#define DM_CTYPE_UNKNOWN "unknown"
/* media */
#define DM_BLOCKSIZE "blocksize"
#define DM_FDISK "fdisk"
#define DM_MTYPE "mtype"
#define DM_NACTUALCYLINDERS "nactual_cylinders"
#define DM_NALTCYLINDERS "nalt_cylinders"
#define DM_NCYLINDERS "ncylinders"
#define DM_NHEADS "nheads"
#define DM_NPHYSCYLINDERS "nphys_cylinders"
#define DM_NSECTORS "nsectors" /* also in partition */
#define DM_SIZE "size" /* also in slice */
#define DM_NACCESSIBLE "naccessible"
#define DM_LABEL "label"
/* partition */
#define DM_BCYL "bcyl"
#define DM_BHEAD "bhead"
#define DM_BOOTID "bootid"
#define DM_BSECT "bsect"
#define DM_ECYL "ecyl"
#define DM_EHEAD "ehead"
#define DM_ESECT "esect"
#define DM_PTYPE "ptype"
#define DM_RELSECT "relsect"
/* slice */
#define DM_DEVICEID "deviceid"
#define DM_DEVT "devt"
#define DM_INDEX "index"
#define DM_EFI_NAME "name"
#define DM_MOUNTPOINT "mountpoint"
#define DM_LOCALNAME "localname"
#define DM_START "start"
#define DM_TAG "tag"
#define DM_FLAG "flag"
#define DM_EFI "efi" /* also on media */
#define DM_USED_BY "used_by"
#define DM_USED_NAME "used_name"
#define DM_USE_MOUNT "mount"
#define DM_USE_SVM "svm"
#define DM_USE_LU "lu"
#define DM_USE_DUMP "dump"
#define DM_USE_VXVM "vxvm"
#define DM_USE_FS "fs"
#define DM_USE_VFSTAB "vfstab"
#define DM_USE_EXPORTED_ZPOOL "exported_zpool"
#define DM_USE_ACTIVE_ZPOOL "active_zpool"
#define DM_USE_SPARE_ZPOOL "spare_zpool"
#define DM_USE_L2CACHE_ZPOOL "l2cache_zpool"
/* event */
#define DM_EV_NAME "name"
#define DM_EV_DTYPE "edtype"
#define DM_EV_TYPE "evtype"
#define DM_EV_TADD "add"
#define DM_EV_TREMOVE "remove"
#define DM_EV_TCHANGE "change"
/* findisks */
#define DM_CTYPE "ctype"
#define DM_LUN "lun"
#define DM_TARGET "target"
#define NOINUSE_SET getenv("NOINUSE_CHECK") != NULL
void dm_free_descriptors(dm_descriptor_t *desc_list);
void dm_free_descriptor(dm_descriptor_t desc);
void dm_free_name(char *name);
void dm_free_swapentries(swaptbl_t *);
dm_descriptor_t *dm_get_descriptors(dm_desc_type_t type, int filter[],
int *errp);
dm_descriptor_t *dm_get_associated_descriptors(dm_descriptor_t desc,
dm_desc_type_t type, int *errp);
dm_desc_type_t *dm_get_associated_types(dm_desc_type_t type);
dm_descriptor_t dm_get_descriptor_by_name(dm_desc_type_t desc_type,
char *name, int *errp);
char *dm_get_name(dm_descriptor_t desc, int *errp);
dm_desc_type_t dm_get_type(dm_descriptor_t desc);
nvlist_t *dm_get_attributes(dm_descriptor_t desc, int *errp);
nvlist_t *dm_get_stats(dm_descriptor_t desc, int stat_type,
int *errp);
void dm_init_event_queue(void(*callback)(nvlist_t *, int),
int *errp);
nvlist_t *dm_get_event(int *errp);
void dm_get_slices(char *drive, dm_descriptor_t **slices,
int *errp);
void dm_get_slice_stats(char *slice, nvlist_t **dev_stats,
int *errp);
int dm_get_swapentries(swaptbl_t **, int *);
void dm_get_usage_string(char *who, char *data, char **msg);
int dm_inuse(char *dev_name, char **msg, dm_who_type_t who,
int *errp);
int dm_inuse_swap(const char *dev_name, int *errp);
int dm_isoverlapping(char *dev_name, char **msg, int *errp);
#ifdef __cplusplus
}
#endif
#endif /* _LIBDISKMGT_H */
#endif /* HAVE_LIBDISKMGT_H */
+287
View File
@@ -0,0 +1,287 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* basic API declarations for share management
*/
#include "zfs_config.h"
#ifdef HAVE_LIBSHARE
#include_next <libshare.h>
#else
#ifndef _LIBSHARE_H
#define _LIBSHARE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
/*
* Basic datatypes for most functions
*/
typedef void *sa_group_t;
typedef void *sa_share_t;
typedef void *sa_property_t;
typedef void *sa_optionset_t;
typedef void *sa_security_t;
typedef void *sa_protocol_properties_t;
typedef void *sa_resource_t;
typedef void *sa_handle_t; /* opaque handle to access core functions */
/*
* defined error values
*/
#define SA_OK 0
#define SA_NO_SUCH_PATH 1 /* provided path doesn't exist */
#define SA_NO_MEMORY 2 /* no memory for data structures */
#define SA_DUPLICATE_NAME 3 /* object name is already in use */
#define SA_BAD_PATH 4 /* not a full path */
#define SA_NO_SUCH_GROUP 5 /* group is not defined */
#define SA_CONFIG_ERR 6 /* system configuration error */
#define SA_SYSTEM_ERR 7 /* system error, use errno */
#define SA_SYNTAX_ERR 8 /* syntax error on command line */
#define SA_NO_PERMISSION 9 /* no permission for operation */
#define SA_BUSY 10 /* resource is busy */
#define SA_NO_SUCH_PROP 11 /* property doesn't exist */
#define SA_INVALID_NAME 12 /* name of object is invalid */
#define SA_INVALID_PROTOCOL 13 /* specified protocol not valid */
#define SA_NOT_ALLOWED 14 /* operation not allowed */
#define SA_BAD_VALUE 15 /* bad value for property */
#define SA_INVALID_SECURITY 16 /* invalid security type */
#define SA_NO_SUCH_SECURITY 17 /* security set not found */
#define SA_VALUE_CONFLICT 18 /* property value conflict */
#define SA_NOT_IMPLEMENTED 19 /* plugin interface not implemented */
#define SA_INVALID_PATH 20 /* path is sub-dir of existing share */
#define SA_NOT_SUPPORTED 21 /* operation not supported for proto */
#define SA_PROP_SHARE_ONLY 22 /* property valid on share only */
#define SA_NOT_SHARED 23 /* path is not shared */
#define SA_NO_SUCH_RESOURCE 24 /* resource not found */
#define SA_RESOURCE_REQUIRED 25 /* resource name is required */
#define SA_MULTIPLE_ERROR 26 /* multiple protocols reported error */
#define SA_PATH_IS_SUBDIR 27 /* check_path found path is subdir */
#define SA_PATH_IS_PARENTDIR 28 /* check_path found path is parent */
#define SA_NO_SECTION 29 /* protocol requires section info */
#define SA_NO_SUCH_SECTION 30 /* no section found */
#define SA_NO_PROPERTIES 31 /* no properties found */
#define SA_PASSWORD_ENC 32 /* passwords must be encrypted */
/* API Initialization */
#define SA_INIT_SHARE_API 0x0001 /* init share specific interface */
#define SA_INIT_CONTROL_API 0x0002 /* init control specific interface */
/* not part of API returns */
#define SA_LEGACY_ERR 32 /* share/unshare error return */
/*
* other defined values
*/
#define SA_MAX_NAME_LEN 100 /* must fit service instance name */
#define SA_MAX_RESOURCE_NAME 255 /* Maximum length of resource name */
/* Used in calls to sa_add_share() and sa_add_resource() */
#define SA_SHARE_TRANSIENT 0 /* shared but not across reboot */
#define SA_SHARE_LEGACY 1 /* share is in dfstab only */
#define SA_SHARE_PERMANENT 2 /* share goes to repository */
/* sa_check_path() related */
#define SA_CHECK_NORMAL 0 /* only check against active shares */
#define SA_CHECK_STRICT 1 /* check against all shares */
/* RBAC related */
#define SA_RBAC_MANAGE "solaris.smf.manage.shares"
#define SA_RBAC_VALUE "solaris.smf.value.shares"
/*
* Feature set bit definitions
*/
#define SA_FEATURE_NONE 0x0000 /* no feature flags set */
#define SA_FEATURE_RESOURCE 0x0001 /* resource names are required */
#define SA_FEATURE_DFSTAB 0x0002 /* need to manage in dfstab */
#define SA_FEATURE_ALLOWSUBDIRS 0x0004 /* allow subdirs to be shared */
#define SA_FEATURE_ALLOWPARDIRS 0x0008 /* allow parent dirs to be shared */
#define SA_FEATURE_HAS_SECTIONS 0x0010 /* protocol supports sections */
#define SA_FEATURE_ADD_PROPERTIES 0x0020 /* can add properties */
#define SA_FEATURE_SERVER 0x0040 /* protocol supports server mode */
/*
* legacy files
*/
#define SA_LEGACY_DFSTAB "/etc/dfs/dfstab"
#define SA_LEGACY_SHARETAB "/etc/dfs/sharetab"
/*
* SMF related
*/
#define SA_SVC_FMRI_BASE "svc:/network/shares/group"
/* initialization */
extern sa_handle_t sa_init(int);
extern void sa_fini(sa_handle_t);
extern int sa_update_config(sa_handle_t);
extern char *sa_errorstr(int);
/* protocol names */
extern int sa_get_protocols(char ***);
extern int sa_valid_protocol(char *);
/* group control (create, remove, etc) */
extern sa_group_t sa_create_group(sa_handle_t, char *, int *);
extern int sa_remove_group(sa_group_t);
extern sa_group_t sa_get_group(sa_handle_t, char *);
extern sa_group_t sa_get_next_group(sa_group_t);
extern char *sa_get_group_attr(sa_group_t, char *);
extern int sa_set_group_attr(sa_group_t, char *, char *);
extern sa_group_t sa_get_sub_group(sa_group_t);
extern int sa_valid_group_name(char *);
/* share control */
extern sa_share_t sa_add_share(sa_group_t, char *, int, int *);
extern int sa_check_path(sa_group_t, char *, int);
extern int sa_move_share(sa_group_t, sa_share_t);
extern int sa_remove_share(sa_share_t);
extern sa_share_t sa_get_share(sa_group_t, char *);
extern sa_share_t sa_find_share(sa_handle_t, char *);
extern sa_share_t sa_get_next_share(sa_share_t);
extern char *sa_get_share_attr(sa_share_t, char *);
extern char *sa_get_share_description(sa_share_t);
extern sa_group_t sa_get_parent_group(sa_share_t);
extern int sa_set_share_attr(sa_share_t, char *, char *);
extern int sa_set_share_description(sa_share_t, char *);
extern int sa_enable_share(sa_group_t, char *);
extern int sa_disable_share(sa_share_t, char *);
extern int sa_is_share(void *);
/* resource name related */
extern sa_resource_t sa_find_resource(sa_handle_t, char *);
extern sa_resource_t sa_get_resource(sa_group_t, char *);
extern sa_resource_t sa_get_next_resource(sa_resource_t);
extern sa_share_t sa_get_resource_parent(sa_resource_t);
extern sa_resource_t sa_get_share_resource(sa_share_t, char *);
extern sa_resource_t sa_add_resource(sa_share_t, char *, int, int *);
extern int sa_remove_resource(sa_resource_t);
extern char *sa_get_resource_attr(sa_resource_t, char *);
extern int sa_set_resource_attr(sa_resource_t, char *, char *);
extern int sa_set_resource_description(sa_resource_t, char *);
extern char *sa_get_resource_description(sa_resource_t);
extern int sa_enable_resource(sa_resource_t, char *);
extern int sa_disable_resource(sa_resource_t, char *);
extern int sa_rename_resource(sa_resource_t, char *);
extern void sa_fix_resource_name(char *);
/* data structure free calls */
extern void sa_free_attr_string(char *);
extern void sa_free_share_description(char *);
/* optionset control */
extern sa_optionset_t sa_get_optionset(sa_group_t, char *);
extern sa_optionset_t sa_get_next_optionset(sa_group_t);
extern char *sa_get_optionset_attr(sa_optionset_t, char *);
extern void sa_set_optionset_attr(sa_optionset_t, char *, char *);
extern sa_optionset_t sa_create_optionset(sa_group_t, char *);
extern int sa_destroy_optionset(sa_optionset_t);
extern sa_optionset_t sa_get_derived_optionset(void *, char *, int);
extern void sa_free_derived_optionset(sa_optionset_t);
/* property functions */
extern sa_property_t sa_get_property(sa_optionset_t, char *);
extern sa_property_t sa_get_next_property(sa_group_t);
extern char *sa_get_property_attr(sa_property_t, char *);
extern sa_property_t sa_create_section(char *, char *);
extern void sa_set_section_attr(sa_property_t, char *, char *);
extern sa_property_t sa_create_property(char *, char *);
extern int sa_add_property(void *, sa_property_t);
extern int sa_update_property(sa_property_t, char *);
extern int sa_remove_property(sa_property_t);
extern int sa_commit_properties(sa_optionset_t, int);
extern int sa_valid_property(void *, char *, sa_property_t);
extern int sa_is_persistent(void *);
/* security control */
extern sa_security_t sa_get_security(sa_group_t, char *, char *);
extern sa_security_t sa_get_next_security(sa_security_t);
extern char *sa_get_security_attr(sa_optionset_t, char *);
extern sa_security_t sa_create_security(sa_group_t, char *, char *);
extern int sa_destroy_security(sa_security_t);
extern void sa_set_security_attr(sa_security_t, char *, char *);
extern sa_optionset_t sa_get_all_security_types(void *, char *, int);
extern sa_security_t sa_get_derived_security(void *, char *, char *, int);
extern void sa_free_derived_security(sa_security_t);
/* protocol specific interfaces */
extern int sa_parse_legacy_options(sa_group_t, char *, char *);
extern char *sa_proto_legacy_format(char *, sa_group_t, int);
extern int sa_is_security(char *, char *);
extern sa_protocol_properties_t sa_proto_get_properties(char *);
extern uint64_t sa_proto_get_featureset(char *);
extern sa_property_t sa_get_protocol_section(sa_protocol_properties_t, char *);
extern sa_property_t sa_get_next_protocol_section(sa_property_t, char *);
extern sa_property_t sa_get_protocol_property(sa_protocol_properties_t, char *);
extern sa_property_t sa_get_next_protocol_property(sa_property_t, char *);
extern int sa_set_protocol_property(sa_property_t, char *, char *);
extern char *sa_get_protocol_status(char *);
extern void sa_format_free(char *);
extern sa_protocol_properties_t sa_create_protocol_properties(char *);
extern int sa_add_protocol_property(sa_protocol_properties_t, sa_property_t);
extern int sa_proto_valid_prop(char *, sa_property_t, sa_optionset_t);
extern int sa_proto_valid_space(char *, char *);
extern char *sa_proto_space_alias(char *, char *);
extern int sa_proto_get_transients(sa_handle_t, char *);
extern int sa_proto_notify_resource(sa_resource_t, char *);
extern int sa_proto_change_notify(sa_share_t, char *);
extern int sa_proto_delete_section(char *, char *);
/* handle legacy (dfstab/sharetab) files */
extern int sa_delete_legacy(sa_share_t, char *);
extern int sa_update_legacy(sa_share_t, char *);
extern int sa_update_sharetab(sa_share_t, char *);
extern int sa_delete_sharetab(sa_handle_t, char *, char *);
/* ZFS functions */
extern int sa_zfs_is_shared(sa_handle_t, char *);
extern int sa_group_is_zfs(sa_group_t);
extern int sa_path_is_zfs(char *);
/* SA Handle specific functions */
extern sa_handle_t sa_find_group_handle(sa_group_t);
#ifdef __cplusplus
}
#endif
#endif /* _LIBSHARE_H */
#endif /* HAVE_LIBSHARE */
+35
View File
@@ -0,0 +1,35 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <mntent.h>
#ifndef _PORT_MNTENT_H
#define _PORT_MNTENT_H
/* For HAVE_SETMNTENT */
#include "zfs_config.h"
#endif
+38
View File
@@ -0,0 +1,38 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <stdlib.h>
#ifndef _PORT_STDLIB_H
#define _PORT_STDLIB_H
#include "zfs_config.h"
#ifndef HAVE_GETEXECNAME
extern const char *getexecname();
#endif
#endif
+46
View File
@@ -0,0 +1,46 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <string.h>
#ifndef _PORT_STRING_H
#define _PORT_STRING_H
#include "zfs_config.h"
#ifndef HAVE_STRLCPY
extern size_t strlcpy(char *dst, const char *src, size_t len);
#endif
#ifndef HAVE_STRLCAT
extern size_t strlcat(char *, const char *, size_t);
#endif
#ifndef HAVE_STRNLEN
extern size_t strnlen(const char *src, size_t maxlen);
#endif
#endif
+38
View File
@@ -0,0 +1,38 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <strings.h>
#ifndef _PORT_STRINGS_H
#define _PORT_STRINGS_H
#include "zfs_config.h"
#ifndef HAVE_STRCMP_IN_STRINGS_H
#include <string.h>
#endif
#endif
+37
View File
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <stropts.h>
#ifndef _PORT_STROPTS_H
#define _PORT_STROPTS_H
#include "zfs_config.h"
#ifdef HAVE_IOCTL_IN_STROPTS_H
#include <fake_ioctl.h>
#endif
#endif /* _PORT_STROPTS_H */
+3
View File
@@ -0,0 +1,3 @@
DISTFILES = byteorder.h debug.h efi_partition.h ioctl.h isa_defs.h
DISTFILES += policy.h socket.h swap.h systeminfo.h systm.h time.h
DISTFILES += types.h u8_textprep.h u8_textprep_data.h
+31
View File
@@ -0,0 +1,31 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_SYS_BYTEORDER_H
#include_next <sys/byteorder.h>
#endif
+47
View File
@@ -0,0 +1,47 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PORT_SYS_DEBUG_H
#define _PORT_SYS_DEBUG_H
#include <assert.h>
/* This definition is copied from assert.h. */
#if defined(__STDC__)
#if __STDC_VERSION__ - 0 >= 199901L
#define zp_verify(EX) (void)((EX) || \
(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
#else
#define zp_verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
#endif /* __STDC_VERSION__ - 0 >= 199901L */
#else
#define zp_verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
#endif /* __STDC__ */
#define VERIFY(EX) zp_verify(EX)
#define ASSERT(EX) assert(EX)
#endif
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_LIBEFI
#include_next <sys/efi_partition.h>
#ifndef EFI_MIN_RESV_SIZE
#define EFI_MIN_RESV_SIZE (16 * 1024)
#endif
#endif
+37
View File
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <sys/ioctl.h>
#ifndef _PORT_SYS_IOCTL_H
#define _PORT_SYS_IOCTL_H
#include "zfs_config.h"
#ifdef HAVE_IOCTL_IN_SYS_IOCTL_H
#include <fake_ioctl.h>
#endif
#endif /* _PORT_SYS_IOCTL_H */
+31
View File
@@ -0,0 +1,31 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_SYS_ISA_DEFS_H
#include_next <sys/isa_defs.h>
#endif
+40
View File
@@ -0,0 +1,40 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PORT_SYS_POLICY_H
#define _PORT_SYS_POLICY_H
#ifdef WANT_KERNEL_EMUL
#define secpolicy_fs_unmount(c,vfs) (0)
#define secpolicy_nfs(c) (0)
#define secpolicy_sys_config(c,co) (0)
#define secpolicy_zfs(c) (0)
#define secpolicy_zinject(c) (0)
#endif /* WANT_KERNEL_EMUL */
#endif /* _PORT_SYS_POLICY_H */
+37
View File
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <sys/socket.h>
#ifndef _PORT_SYS_SOCKET_H
#define _PORT_SYS_SOCKET_H
/* Solaris doesn't have MSG_NOSIGNAL */
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#endif
+32
View File
@@ -0,0 +1,32 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PORT_SYS_SWAP_H
#define _PORT_SYS_SWAP_H
typedef int swaptbl_t;
#endif
+40
View File
@@ -0,0 +1,40 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_SYS_SYSTEMINFO_H
#include_next <sys/systeminfo.h>
#endif
#ifndef _PORT_SYS_SYSTEMINFO_H
#define _PORT_SYS_SYSTEMINFO_H
#ifndef HAVE_SYSINFO_IN_SYSTEMINFO_H
#define sysinfo(cmd,buf,cnt) (-1)
#endif
#endif
+40
View File
@@ -0,0 +1,40 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PORT_SYS_SYSTM_H
#define _PORT_SYS_SYSTM_H
#ifdef WANT_KERNEL_EMUL
#include <sys/dmu_ctl.h>
#define copyinstr(from,to,max,len) dctls_copyinstr(from,to,max,len)
#define xcopyin(src,dest,size) dctls_copyin(src,dest,size)
#define xcopyout(src,dest,size) dctls_copyout(src,dest,size)
#endif /* WANT_KERNEL_EMUL */
#endif /* _PORT_SYS_SYSM_H */
+31
View File
@@ -0,0 +1,31 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <sys/time.h>
#ifndef NANOSEC
#define NANOSEC 1000000000
#endif
+49
View File
@@ -0,0 +1,49 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <sys/types.h>
#ifndef _PORT_SYS_TYPES_H
#define _PORT_SYS_TYPES_H
#include "zfs_config.h"
#ifndef HAVE_INTTYPES
#include <inttypes.h>
typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
typedef unsigned char uchar_t;
typedef unsigned short ushort_t;
typedef unsigned int uint_t;
typedef unsigned long ulong_t;
typedef long long longlong_t;
typedef unsigned long long u_longlong_t;
#endif /* HAVE_INTTYPES */
#endif
+119
View File
@@ -0,0 +1,119 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "zfs_config.h"
#ifdef HAVE_UNICODE
#include_next <sys/u8_textprep.h>
#else
#ifndef _SYS_U8_TEXTPREP_H
#define _SYS_U8_TEXTPREP_H
#include <sys/isa_defs.h>
#include <sys/types.h>
#include <sys/errno.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Unicode encoding conversion functions and their macros.
*/
#define UCONV_IN_BIG_ENDIAN 0x0001
#define UCONV_OUT_BIG_ENDIAN 0x0002
#define UCONV_IN_SYSTEM_ENDIAN 0x0004
#define UCONV_OUT_SYSTEM_ENDIAN 0x0008
#define UCONV_IN_LITTLE_ENDIAN 0x0010
#define UCONV_OUT_LITTLE_ENDIAN 0x0020
#define UCONV_IGNORE_NULL 0x0040
#define UCONV_IN_ACCEPT_BOM 0x0080
#define UCONV_OUT_EMIT_BOM 0x0100
extern int uconv_u16tou32(const uint16_t *, size_t *, uint32_t *, size_t *,
int);
extern int uconv_u16tou8(const uint16_t *, size_t *, uchar_t *, size_t *, int);
extern int uconv_u32tou16(const uint32_t *, size_t *, uint16_t *, size_t *,
int);
extern int uconv_u32tou8(const uint32_t *, size_t *, uchar_t *, size_t *, int);
extern int uconv_u8tou16(const uchar_t *, size_t *, uint16_t *, size_t *, int);
extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int);
/*
* UTF-8 text preparation functions and their macros.
*
* Among the macros defined, U8_CANON_DECOMP, U8_COMPAT_DECOMP, and
* U8_CANON_COMP are not public interfaces and must not be used directly
* at the flag input argument.
*/
#define U8_STRCMP_CS (0x00000001)
#define U8_STRCMP_CI_UPPER (0x00000002)
#define U8_STRCMP_CI_LOWER (0x00000004)
#define U8_CANON_DECOMP (0x00000010)
#define U8_COMPAT_DECOMP (0x00000020)
#define U8_CANON_COMP (0x00000040)
#define U8_STRCMP_NFD (U8_CANON_DECOMP)
#define U8_STRCMP_NFC (U8_CANON_DECOMP | U8_CANON_COMP)
#define U8_STRCMP_NFKD (U8_COMPAT_DECOMP)
#define U8_STRCMP_NFKC (U8_COMPAT_DECOMP | U8_CANON_COMP)
#define U8_TEXTPREP_TOUPPER (U8_STRCMP_CI_UPPER)
#define U8_TEXTPREP_TOLOWER (U8_STRCMP_CI_LOWER)
#define U8_TEXTPREP_NFD (U8_STRCMP_NFD)
#define U8_TEXTPREP_NFC (U8_STRCMP_NFC)
#define U8_TEXTPREP_NFKD (U8_STRCMP_NFKD)
#define U8_TEXTPREP_NFKC (U8_STRCMP_NFKC)
#define U8_TEXTPREP_IGNORE_NULL (0x00010000)
#define U8_TEXTPREP_IGNORE_INVALID (0x00020000)
#define U8_TEXTPREP_NOWAIT (0x00040000)
#define U8_UNICODE_320 (0)
#define U8_UNICODE_500 (1)
#define U8_UNICODE_LATEST (U8_UNICODE_500)
#define U8_VALIDATE_ENTIRE (0x00100000)
#define U8_VALIDATE_CHECK_ADDITIONAL (0x00200000)
#define U8_VALIDATE_UCS2_RANGE (0x00400000)
#define U8_ILLEGAL_CHAR (-1)
#define U8_OUT_OF_RANGE_CHAR (-2)
extern int u8_validate(char *, size_t, char **, int, int *);
extern int u8_strcmp(const char *, const char *, size_t, int, size_t, int *);
extern size_t u8_textprep_str(char *, size_t *, char *, size_t *, int, size_t,
int *);
#ifdef __cplusplus
}
#endif
#endif /* HAVE_UNICODE */
#endif /* _SYS_U8_TEXTPREP_H */
File diff suppressed because it is too large Load Diff
+53
View File
@@ -0,0 +1,53 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include_next <unistd.h>
#ifndef _PORT_UNISTD_H
#define _PORT_UNISTD_H
#include "zfs_config.h"
#ifndef HAVE_ISSETUGID
#include <sys/types.h>
#define issetugid() (geteuid() == 0 || getegid() == 0)
#endif
#ifdef HAVE_IOCTL_IN_UNISTD_H
#include <fake_ioctl.h>
#endif
#if !defined(__sun__) && !defined(__sun)
/* It seems Solaris only returns positive host ids */
static inline long fake_gethostid()
{
long id = gethostid();
return id >= 0 ? id : -id;
}
#define gethostid() fake_gethostid()
#endif
#endif /* _PORT_UNISTD_H */
+63
View File
@@ -0,0 +1,63 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <limits.h>
#include "zfs_config.h"
#ifndef HAVE_GETEXECNAME
const char *getexecname()
{
#ifdef __linux__
static char execname[PATH_MAX + 1];
/* Must be MT-safe */
static pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&mtx);
if (strlen(execname) == 0) {
ssize_t rc = readlink("/proc/self/exe", execname, sizeof(execname - 1));
if (rc == -1) {
execname[0] = '\0';
pthread_mutex_unlock(&mtx);
return NULL;
} else
execname[rc] = '\0';
}
pthread_mutex_unlock(&mtx);
return execname;
#else
return NULL;
#endif
}
#endif
+61
View File
@@ -0,0 +1,61 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <string.h>
#include <sys/types.h>
#include "zfs_config.h"
/*
* Appends src to the dstsize buffer at dst. The append will never
* overflow the destination buffer and the buffer will always be null
* terminated. Never reference beyond &dst[dstsize-1] when computing
* the length of the pre-existing string.
*/
#ifndef HAVE_STRLCAT
size_t
strlcat(char *dst, const char *src, size_t dstsize)
{
char *df = dst;
size_t left = dstsize;
size_t l1;
size_t l2 = strlen(src);
size_t copied;
while (left-- != 0 && *df != '\0')
df++;
l1 = df - dst;
if (dstsize == l1)
return (l1 + l2);
copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
(void) memcpy(dst + l1, src, copied);
dst[l1+copied] = '\0';
return (l1 + l2);
}
#endif
+57
View File
@@ -0,0 +1,57 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <string.h>
#include <sys/types.h>
#include "zfs_config.h"
/*
* Copies src to the dstsize buffer at dst. The copy will never
* overflow the destination buffer and the buffer will always be null
* terminated.
*/
#ifndef HAVE_STRLCPY
size_t
strlcpy(char *dst, const char *src, size_t len)
{
size_t slen = strlen(src);
size_t copied;
if (len == 0)
return (slen);
if (slen >= len)
copied = len - 1;
else
copied = slen;
(void) memcpy(dst, src, copied);
dst[copied] = '\0';
return (slen);
}
#endif
+50
View File
@@ -0,0 +1,50 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc.
* All rights reserved. Use is subject to license terms.
*/
#include <string.h>
#include <sys/types.h>
#include "zfs_config.h"
/*
* Returns the number of non-NULL bytes in string argument,
* but not more than maxlen. Does not look past str + maxlen.
*/
#ifndef HAVE_STRNLEN
size_t
strnlen(const char *str, size_t maxlen)
{
const char *ptr;
ptr = memchr(str, 0, maxlen);
if (ptr == NULL)
return (maxlen);
return (ptr - str);
}
#endif
File diff suppressed because it is too large Load Diff
+22
View File
@@ -0,0 +1,22 @@
subdir-m += amd64 i386 sparc64 include
DISTFILES = atomic_asm_weak.h gen_synonyms.h getmntany.c
DISTFILES += mkdirp.c synonyms.h tsd.h zone.c
LIBRARY := libsolcompat
# Compile as shared library. There's an extra useless host program
# here called 'zu' because it was the easiest way I could convince
# the kernel build system to construct a user space shared library.
HOSTCFLAGS += @HOSTCFLAGS@
HOSTCFLAGS += -I@LIBDIR@/libsolcompat/include
HOSTCFLAGS += -I@LIBDIR@/libport/include
hostprogs-y := zu
always := $(hostprogs-y)
zu-objs := zu.o ${LIBRARY}.so
${LIBRARY}-objs += getmntany.o
${LIBRARY}-objs += mkdirp.o
${LIBRARY}-objs += zone.o
+1
View File
@@ -0,0 +1 @@
DISTFILES = atomic.S
+617
View File
@@ -0,0 +1,617 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.ident "%Z%%M% %I% %E% SMI"
.file "%M%"
#define _ASM
#include <sys/asm_linkage.h>
#if defined(_KERNEL)
/*
* Legacy kernel interfaces; they will go away (eventually).
*/
ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
#else
/*
* Include the definitions for the libc weak aliases.
*/
#include "../atomic_asm_weak.h"
#endif
ENTRY(atomic_inc_8)
ALTENTRY(atomic_inc_uchar)
lock
incb (%rdi)
ret
SET_SIZE(atomic_inc_uchar)
SET_SIZE(atomic_inc_8)
ENTRY(atomic_inc_16)
ALTENTRY(atomic_inc_ushort)
lock
incw (%rdi)
ret
SET_SIZE(atomic_inc_ushort)
SET_SIZE(atomic_inc_16)
ENTRY(atomic_inc_32)
ALTENTRY(atomic_inc_uint)
lock
incl (%rdi)
ret
SET_SIZE(atomic_inc_uint)
SET_SIZE(atomic_inc_32)
ENTRY(atomic_inc_64)
ALTENTRY(atomic_inc_ulong)
lock
incq (%rdi)
ret
SET_SIZE(atomic_inc_ulong)
SET_SIZE(atomic_inc_64)
ENTRY(atomic_inc_8_nv)
ALTENTRY(atomic_inc_uchar_nv)
movb (%rdi), %al
1:
leaq 1(%rax), %rcx
lock
cmpxchgb %cl, (%rdi)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_inc_uchar_nv)
SET_SIZE(atomic_inc_8_nv)
ENTRY(atomic_inc_16_nv)
ALTENTRY(atomic_inc_ushort_nv)
movw (%rdi), %ax
1:
leaq 1(%rax), %rcx
lock
cmpxchgw %cx, (%rdi)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_inc_ushort_nv)
SET_SIZE(atomic_inc_16_nv)
ENTRY(atomic_inc_32_nv)
ALTENTRY(atomic_inc_uint_nv)
movl (%rdi), %eax
1:
leaq 1(%rax), %rcx
lock
cmpxchgl %ecx, (%rdi)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_inc_uint_nv)
SET_SIZE(atomic_inc_32_nv)
ENTRY(atomic_inc_64_nv)
ALTENTRY(atomic_inc_ulong_nv)
movq (%rdi), %rax
1:
leaq 1(%rax), %rcx
lock
cmpxchgq %rcx, (%rdi)
jne 1b
movq %rcx, %rax
ret
SET_SIZE(atomic_inc_ulong_nv)
SET_SIZE(atomic_inc_64_nv)
ENTRY(atomic_dec_8)
ALTENTRY(atomic_dec_uchar)
lock
decb (%rdi)
ret
SET_SIZE(atomic_dec_uchar)
SET_SIZE(atomic_dec_8)
ENTRY(atomic_dec_16)
ALTENTRY(atomic_dec_ushort)
lock
decw (%rdi)
ret
SET_SIZE(atomic_dec_ushort)
SET_SIZE(atomic_dec_16)
ENTRY(atomic_dec_32)
ALTENTRY(atomic_dec_uint)
lock
decl (%rdi)
ret
SET_SIZE(atomic_dec_uint)
SET_SIZE(atomic_dec_32)
ENTRY(atomic_dec_64)
ALTENTRY(atomic_dec_ulong)
lock
decq (%rdi)
ret
SET_SIZE(atomic_dec_ulong)
SET_SIZE(atomic_dec_64)
ENTRY(atomic_dec_8_nv)
ALTENTRY(atomic_dec_uchar_nv)
movb (%rdi), %al
1:
leaq -1(%rax), %rcx
lock
cmpxchgb %cl, (%rdi)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_dec_uchar_nv)
SET_SIZE(atomic_dec_8_nv)
ENTRY(atomic_dec_16_nv)
ALTENTRY(atomic_dec_ushort_nv)
movw (%rdi), %ax
1:
leaq -1(%rax), %rcx
lock
cmpxchgw %cx, (%rdi)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_dec_ushort_nv)
SET_SIZE(atomic_dec_16_nv)
ENTRY(atomic_dec_32_nv)
ALTENTRY(atomic_dec_uint_nv)
movl (%rdi), %eax
1:
leaq -1(%rax), %rcx
lock
cmpxchgl %ecx, (%rdi)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_dec_uint_nv)
SET_SIZE(atomic_dec_32_nv)
ENTRY(atomic_dec_64_nv)
ALTENTRY(atomic_dec_ulong_nv)
movq (%rdi), %rax
1:
leaq -1(%rax), %rcx
lock
cmpxchgq %rcx, (%rdi)
jne 1b
movq %rcx, %rax
ret
SET_SIZE(atomic_dec_ulong_nv)
SET_SIZE(atomic_dec_64_nv)
ENTRY(atomic_add_8)
ALTENTRY(atomic_add_char)
lock
addb %sil, (%rdi)
ret
SET_SIZE(atomic_add_char)
SET_SIZE(atomic_add_8)
ENTRY(atomic_add_16)
ALTENTRY(atomic_add_short)
lock
addw %si, (%rdi)
ret
SET_SIZE(atomic_add_short)
SET_SIZE(atomic_add_16)
ENTRY(atomic_add_32)
ALTENTRY(atomic_add_int)
lock
addl %esi, (%rdi)
ret
SET_SIZE(atomic_add_int)
SET_SIZE(atomic_add_32)
ENTRY(atomic_add_64)
ALTENTRY(atomic_add_ptr)
ALTENTRY(atomic_add_long)
lock
addq %rsi, (%rdi)
ret
SET_SIZE(atomic_add_long)
SET_SIZE(atomic_add_ptr)
SET_SIZE(atomic_add_64)
ENTRY(atomic_or_8)
ALTENTRY(atomic_or_uchar)
lock
orb %sil, (%rdi)
ret
SET_SIZE(atomic_or_uchar)
SET_SIZE(atomic_or_8)
ENTRY(atomic_or_16)
ALTENTRY(atomic_or_ushort)
lock
orw %si, (%rdi)
ret
SET_SIZE(atomic_or_ushort)
SET_SIZE(atomic_or_16)
ENTRY(atomic_or_32)
ALTENTRY(atomic_or_uint)
lock
orl %esi, (%rdi)
ret
SET_SIZE(atomic_or_uint)
SET_SIZE(atomic_or_32)
ENTRY(atomic_or_64)
ALTENTRY(atomic_or_ulong)
lock
orq %rsi, (%rdi)
ret
SET_SIZE(atomic_or_ulong)
SET_SIZE(atomic_or_64)
ENTRY(atomic_and_8)
ALTENTRY(atomic_and_uchar)
lock
andb %sil, (%rdi)
ret
SET_SIZE(atomic_and_uchar)
SET_SIZE(atomic_and_8)
ENTRY(atomic_and_16)
ALTENTRY(atomic_and_ushort)
lock
andw %si, (%rdi)
ret
SET_SIZE(atomic_and_ushort)
SET_SIZE(atomic_and_16)
ENTRY(atomic_and_32)
ALTENTRY(atomic_and_uint)
lock
andl %esi, (%rdi)
ret
SET_SIZE(atomic_and_uint)
SET_SIZE(atomic_and_32)
ENTRY(atomic_and_64)
ALTENTRY(atomic_and_ulong)
lock
andq %rsi, (%rdi)
ret
SET_SIZE(atomic_and_ulong)
SET_SIZE(atomic_and_64)
ENTRY(atomic_add_8_nv)
ALTENTRY(atomic_add_char_nv)
movb (%rdi), %al
1:
movb %sil, %cl
addb %al, %cl
lock
cmpxchgb %cl, (%rdi)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_add_char_nv)
SET_SIZE(atomic_add_8_nv)
ENTRY(atomic_add_16_nv)
ALTENTRY(atomic_add_short_nv)
movw (%rdi), %ax
1:
movw %si, %cx
addw %ax, %cx
lock
cmpxchgw %cx, (%rdi)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_add_short_nv)
SET_SIZE(atomic_add_16_nv)
ENTRY(atomic_add_32_nv)
ALTENTRY(atomic_add_int_nv)
movl (%rdi), %eax
1:
movl %esi, %ecx
addl %eax, %ecx
lock
cmpxchgl %ecx, (%rdi)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_add_int_nv)
SET_SIZE(atomic_add_32_nv)
ENTRY(atomic_add_64_nv)
ALTENTRY(atomic_add_ptr_nv)
ALTENTRY(atomic_add_long_nv)
movq (%rdi), %rax
1:
movq %rsi, %rcx
addq %rax, %rcx
lock
cmpxchgq %rcx, (%rdi)
jne 1b
movq %rcx, %rax
ret
SET_SIZE(atomic_add_long_nv)
SET_SIZE(atomic_add_ptr_nv)
SET_SIZE(atomic_add_64_nv)
ENTRY(atomic_and_8_nv)
ALTENTRY(atomic_and_uchar_nv)
movb (%rdi), %al
1:
movb %sil, %cl
andb %al, %cl
lock
cmpxchgb %cl, (%rdi)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_and_uchar_nv)
SET_SIZE(atomic_and_8_nv)
ENTRY(atomic_and_16_nv)
ALTENTRY(atomic_and_ushort_nv)
movw (%rdi), %ax
1:
movw %si, %cx
andw %ax, %cx
lock
cmpxchgw %cx, (%rdi)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_and_ushort_nv)
SET_SIZE(atomic_and_16_nv)
ENTRY(atomic_and_32_nv)
ALTENTRY(atomic_and_uint_nv)
movl (%rdi), %eax
1:
movl %esi, %ecx
andl %eax, %ecx
lock
cmpxchgl %ecx, (%rdi)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_and_uint_nv)
SET_SIZE(atomic_and_32_nv)
ENTRY(atomic_and_64_nv)
ALTENTRY(atomic_and_ulong_nv)
movq (%rdi), %rax
1:
movq %rsi, %rcx
andq %rax, %rcx
lock
cmpxchgq %rcx, (%rdi)
jne 1b
movq %rcx, %rax
ret
SET_SIZE(atomic_and_ulong_nv)
SET_SIZE(atomic_and_64_nv)
ENTRY(atomic_or_8_nv)
ALTENTRY(atomic_or_uchar_nv)
movb (%rdi), %al
1:
movb %sil, %cl
orb %al, %cl
lock
cmpxchgb %cl, (%rdi)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_and_uchar_nv)
SET_SIZE(atomic_and_8_nv)
ENTRY(atomic_or_16_nv)
ALTENTRY(atomic_or_ushort_nv)
movw (%rdi), %ax
1:
movw %si, %cx
orw %ax, %cx
lock
cmpxchgw %cx, (%rdi)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_or_ushort_nv)
SET_SIZE(atomic_or_16_nv)
ENTRY(atomic_or_32_nv)
ALTENTRY(atomic_or_uint_nv)
movl (%rdi), %eax
1:
movl %esi, %ecx
orl %eax, %ecx
lock
cmpxchgl %ecx, (%rdi)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_or_uint_nv)
SET_SIZE(atomic_or_32_nv)
ENTRY(atomic_or_64_nv)
ALTENTRY(atomic_or_ulong_nv)
movq (%rdi), %rax
1:
movq %rsi, %rcx
orq %rax, %rcx
lock
cmpxchgq %rcx, (%rdi)
jne 1b
movq %rcx, %rax
ret
SET_SIZE(atomic_or_ulong_nv)
SET_SIZE(atomic_or_64_nv)
ENTRY(atomic_cas_8)
ALTENTRY(atomic_cas_uchar)
movzbl %sil, %eax
lock
cmpxchgb %dl, (%rdi)
ret
SET_SIZE(atomic_cas_uchar)
SET_SIZE(atomic_cas_8)
ENTRY(atomic_cas_16)
ALTENTRY(atomic_cas_ushort)
movzwl %si, %eax
lock
cmpxchgw %dx, (%rdi)
ret
SET_SIZE(atomic_cas_ushort)
SET_SIZE(atomic_cas_16)
ENTRY(atomic_cas_32)
ALTENTRY(atomic_cas_uint)
movl %esi, %eax
lock
cmpxchgl %edx, (%rdi)
ret
SET_SIZE(atomic_cas_uint)
SET_SIZE(atomic_cas_32)
ENTRY(atomic_cas_64)
ALTENTRY(atomic_cas_ulong)
ALTENTRY(atomic_cas_ptr)
movq %rsi, %rax
lock
cmpxchgq %rdx, (%rdi)
ret
SET_SIZE(atomic_cas_ptr)
SET_SIZE(atomic_cas_ulong)
SET_SIZE(atomic_cas_64)
ENTRY(atomic_swap_8)
ALTENTRY(atomic_swap_uchar)
movzbl %sil, %eax
lock
xchgb %al, (%rdi)
ret
SET_SIZE(atomic_swap_uchar)
SET_SIZE(atomic_swap_8)
ENTRY(atomic_swap_16)
ALTENTRY(atomic_swap_ushort)
movzwl %si, %eax
lock
xchgw %ax, (%rdi)
ret
SET_SIZE(atomic_swap_ushort)
SET_SIZE(atomic_swap_16)
ENTRY(atomic_swap_32)
ALTENTRY(atomic_swap_uint)
movl %esi, %eax
lock
xchgl %eax, (%rdi)
ret
SET_SIZE(atomic_swap_uint)
SET_SIZE(atomic_swap_32)
ENTRY(atomic_swap_64)
ALTENTRY(atomic_swap_ulong)
ALTENTRY(atomic_swap_ptr)
movq %rsi, %rax
lock
xchgq %rax, (%rdi)
ret
SET_SIZE(atomic_swap_ptr)
SET_SIZE(atomic_swap_ulong)
SET_SIZE(atomic_swap_64)
ENTRY(atomic_set_long_excl)
xorl %eax, %eax
lock
btsq %rsi, (%rdi)
jnc 1f
decl %eax
1:
ret
SET_SIZE(atomic_set_long_excl)
ENTRY(atomic_clear_long_excl)
xorl %eax, %eax
lock
btrq %rsi, (%rdi)
jc 1f
decl %eax
1:
ret
SET_SIZE(atomic_clear_long_excl)
#if !defined(_KERNEL)
/*
* NOTE: membar_enter, and membar_exit are identical routines.
* We define them separately, instead of using an ALTENTRY
* definitions to alias them together, so that DTrace and
* debuggers will see a unique address for them, allowing
* more accurate tracing.
*/
ENTRY(membar_enter)
mfence
ret
SET_SIZE(membar_enter)
ENTRY(membar_exit)
mfence
ret
SET_SIZE(membar_exit)
ENTRY(membar_producer)
sfence
ret
SET_SIZE(membar_producer)
ENTRY(membar_consumer)
lfence
ret
SET_SIZE(membar_consumer)
#endif /* !_KERNEL */
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits
#endif
View File
+94
View File
@@ -0,0 +1,94 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright 2006 Ricardo Correia. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
#include <stdio.h>
#include <string.h>
#include <mntent.h>
#include <sys/mnttab.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#define BUFSIZE (MNT_LINE_MAX + 2)
__thread char buf[BUFSIZE];
#define DIFF(xx) (mrefp->xx != NULL && (mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0))
int
getmntany(FILE *fp, struct mnttab *mgetp, struct mnttab *mrefp)
{
int ret;
while ((ret = _sol_getmntent(fp, mgetp)) == 0 && (DIFF(mnt_special) || DIFF(mnt_mountp) || DIFF(mnt_fstype) || DIFF(mnt_mntopts)));
return ret;
}
int _sol_getmntent(FILE *fp, struct mnttab *mgetp)
{
struct mntent mntbuf;
struct mntent *ret;
ret = getmntent_r(fp, &mntbuf, buf, BUFSIZE);
if(ret != NULL) {
mgetp->mnt_special = mntbuf.mnt_fsname;
mgetp->mnt_mountp = mntbuf.mnt_dir;
mgetp->mnt_fstype = mntbuf.mnt_type;
mgetp->mnt_mntopts = mntbuf.mnt_opts;
return 0;
}
if(feof(fp))
return -1;
return MNT_TOOLONG;
}
int getextmntent(FILE *fp, struct extmnttab *mp, int len)
{
int ret;
struct stat64 st;
ret = _sol_getmntent(fp, (struct mnttab *) mp);
if(ret == 0) {
if(stat64(mp->mnt_mountp, &st) != 0) {
mp->mnt_major = 0;
mp->mnt_minor = 0;
return ret;
}
mp->mnt_major = major(st.st_dev);
mp->mnt_minor = minor(st.st_dev);
}
return ret;
}
+1
View File
@@ -0,0 +1 @@
DISTFILES = atomic.S
+752
View File
@@ -0,0 +1,752 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
.ident "%Z%%M% %I% %E% SMI"
.file "%M%"
#define _ASM
#include <sys/asm_linkage.h>
#if defined(_KERNEL)
/*
* Legacy kernel interfaces; they will go away (eventually).
*/
ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
#else
/*
* Include the definitions for the libc weak aliases.
*/
#include "../atomic_asm_weak.h"
#endif
ENTRY(atomic_inc_8)
ALTENTRY(atomic_inc_uchar)
movl 4(%esp), %eax
lock
incb (%eax)
ret
SET_SIZE(atomic_inc_uchar)
SET_SIZE(atomic_inc_8)
ENTRY(atomic_inc_16)
ALTENTRY(atomic_inc_ushort)
movl 4(%esp), %eax
lock
incw (%eax)
ret
SET_SIZE(atomic_inc_ushort)
SET_SIZE(atomic_inc_16)
ENTRY(atomic_inc_32)
ALTENTRY(atomic_inc_uint)
ALTENTRY(atomic_inc_ulong)
movl 4(%esp), %eax
lock
incl (%eax)
ret
SET_SIZE(atomic_inc_ulong)
SET_SIZE(atomic_inc_uint)
SET_SIZE(atomic_inc_32)
ENTRY(atomic_inc_8_nv)
ALTENTRY(atomic_inc_uchar_nv)
movl 4(%esp), %edx
movb (%edx), %al
1:
leal 1(%eax), %ecx
lock
cmpxchgb %cl, (%edx)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_inc_uchar_nv)
SET_SIZE(atomic_inc_8_nv)
ENTRY(atomic_inc_16_nv)
ALTENTRY(atomic_inc_ushort_nv)
movl 4(%esp), %edx
movw (%edx), %ax
1:
leal 1(%eax), %ecx
lock
cmpxchgw %cx, (%edx)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_inc_ushort_nv)
SET_SIZE(atomic_inc_16_nv)
ENTRY(atomic_inc_32_nv)
ALTENTRY(atomic_inc_uint_nv)
ALTENTRY(atomic_inc_ulong_nv)
movl 4(%esp), %edx
movl (%edx), %eax
1:
leal 1(%eax), %ecx
lock
cmpxchgl %ecx, (%edx)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_inc_ulong_nv)
SET_SIZE(atomic_inc_uint_nv)
SET_SIZE(atomic_inc_32_nv)
/*
* NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
* separated, you need to also edit the libc i386 platform
* specific mapfile and remove the NODYNSORT attribute
* from atomic_inc_64_nv.
*/
ENTRY(atomic_inc_64)
ALTENTRY(atomic_inc_64_nv)
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movl (%edi), %eax
movl 4(%edi), %edx
1:
xorl %ebx, %ebx
xorl %ecx, %ecx
incl %ebx
addl %eax, %ebx
adcl %edx, %ecx
lock
cmpxchg8b (%edi)
jne 1b
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
popl %edi
ret
SET_SIZE(atomic_inc_64_nv)
SET_SIZE(atomic_inc_64)
ENTRY(atomic_dec_8)
ALTENTRY(atomic_dec_uchar)
movl 4(%esp), %eax
lock
decb (%eax)
ret
SET_SIZE(atomic_dec_uchar)
SET_SIZE(atomic_dec_8)
ENTRY(atomic_dec_16)
ALTENTRY(atomic_dec_ushort)
movl 4(%esp), %eax
lock
decw (%eax)
ret
SET_SIZE(atomic_dec_ushort)
SET_SIZE(atomic_dec_16)
ENTRY(atomic_dec_32)
ALTENTRY(atomic_dec_uint)
ALTENTRY(atomic_dec_ulong)
movl 4(%esp), %eax
lock
decl (%eax)
ret
SET_SIZE(atomic_dec_ulong)
SET_SIZE(atomic_dec_uint)
SET_SIZE(atomic_dec_32)
ENTRY(atomic_dec_8_nv)
ALTENTRY(atomic_dec_uchar_nv)
movl 4(%esp), %edx
movb (%edx), %al
1:
leal -1(%eax), %ecx
lock
cmpxchgb %cl, (%edx)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_dec_uchar_nv)
SET_SIZE(atomic_dec_8_nv)
ENTRY(atomic_dec_16_nv)
ALTENTRY(atomic_dec_ushort_nv)
movl 4(%esp), %edx
movw (%edx), %ax
1:
leal -1(%eax), %ecx
lock
cmpxchgw %cx, (%edx)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_dec_ushort_nv)
SET_SIZE(atomic_dec_16_nv)
ENTRY(atomic_dec_32_nv)
ALTENTRY(atomic_dec_uint_nv)
ALTENTRY(atomic_dec_ulong_nv)
movl 4(%esp), %edx
movl (%edx), %eax
1:
leal -1(%eax), %ecx
lock
cmpxchgl %ecx, (%edx)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_dec_ulong_nv)
SET_SIZE(atomic_dec_uint_nv)
SET_SIZE(atomic_dec_32_nv)
/*
* NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
* separated, it is important to edit the libc i386 platform
* specific mapfile and remove the NODYNSORT attribute
* from atomic_dec_64_nv.
*/
ENTRY(atomic_dec_64)
ALTENTRY(atomic_dec_64_nv)
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movl (%edi), %eax
movl 4(%edi), %edx
1:
xorl %ebx, %ebx
xorl %ecx, %ecx
not %ecx
not %ebx
addl %eax, %ebx
adcl %edx, %ecx
lock
cmpxchg8b (%edi)
jne 1b
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
popl %edi
ret
SET_SIZE(atomic_dec_64_nv)
SET_SIZE(atomic_dec_64)
ENTRY(atomic_add_8)
ALTENTRY(atomic_add_char)
movl 4(%esp), %eax
movl 8(%esp), %ecx
lock
addb %cl, (%eax)
ret
SET_SIZE(atomic_add_char)
SET_SIZE(atomic_add_8)
ENTRY(atomic_add_16)
ALTENTRY(atomic_add_short)
movl 4(%esp), %eax
movl 8(%esp), %ecx
lock
addw %cx, (%eax)
ret
SET_SIZE(atomic_add_short)
SET_SIZE(atomic_add_16)
ENTRY(atomic_add_32)
ALTENTRY(atomic_add_int)
ALTENTRY(atomic_add_ptr)
ALTENTRY(atomic_add_long)
movl 4(%esp), %eax
movl 8(%esp), %ecx
lock
addl %ecx, (%eax)
ret
SET_SIZE(atomic_add_long)
SET_SIZE(atomic_add_ptr)
SET_SIZE(atomic_add_int)
SET_SIZE(atomic_add_32)
ENTRY(atomic_or_8)
ALTENTRY(atomic_or_uchar)
movl 4(%esp), %eax
movb 8(%esp), %cl
lock
orb %cl, (%eax)
ret
SET_SIZE(atomic_or_uchar)
SET_SIZE(atomic_or_8)
ENTRY(atomic_or_16)
ALTENTRY(atomic_or_ushort)
movl 4(%esp), %eax
movw 8(%esp), %cx
lock
orw %cx, (%eax)
ret
SET_SIZE(atomic_or_ushort)
SET_SIZE(atomic_or_16)
ENTRY(atomic_or_32)
ALTENTRY(atomic_or_uint)
ALTENTRY(atomic_or_ulong)
movl 4(%esp), %eax
movl 8(%esp), %ecx
lock
orl %ecx, (%eax)
ret
SET_SIZE(atomic_or_ulong)
SET_SIZE(atomic_or_uint)
SET_SIZE(atomic_or_32)
ENTRY(atomic_and_8)
ALTENTRY(atomic_and_uchar)
movl 4(%esp), %eax
movb 8(%esp), %cl
lock
andb %cl, (%eax)
ret
SET_SIZE(atomic_and_uchar)
SET_SIZE(atomic_and_8)
ENTRY(atomic_and_16)
ALTENTRY(atomic_and_ushort)
movl 4(%esp), %eax
movw 8(%esp), %cx
lock
andw %cx, (%eax)
ret
SET_SIZE(atomic_and_ushort)
SET_SIZE(atomic_and_16)
ENTRY(atomic_and_32)
ALTENTRY(atomic_and_uint)
ALTENTRY(atomic_and_ulong)
movl 4(%esp), %eax
movl 8(%esp), %ecx
lock
andl %ecx, (%eax)
ret
SET_SIZE(atomic_and_ulong)
SET_SIZE(atomic_and_uint)
SET_SIZE(atomic_and_32)
ENTRY(atomic_add_8_nv)
ALTENTRY(atomic_add_char_nv)
movl 4(%esp), %edx
movb (%edx), %al
1:
movl 8(%esp), %ecx
addb %al, %cl
lock
cmpxchgb %cl, (%edx)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_add_char_nv)
SET_SIZE(atomic_add_8_nv)
ENTRY(atomic_add_16_nv)
ALTENTRY(atomic_add_short_nv)
movl 4(%esp), %edx
movw (%edx), %ax
1:
movl 8(%esp), %ecx
addw %ax, %cx
lock
cmpxchgw %cx, (%edx)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_add_short_nv)
SET_SIZE(atomic_add_16_nv)
ENTRY(atomic_add_32_nv)
ALTENTRY(atomic_add_int_nv)
ALTENTRY(atomic_add_ptr_nv)
ALTENTRY(atomic_add_long_nv)
movl 4(%esp), %edx
movl (%edx), %eax
1:
movl 8(%esp), %ecx
addl %eax, %ecx
lock
cmpxchgl %ecx, (%edx)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_add_long_nv)
SET_SIZE(atomic_add_ptr_nv)
SET_SIZE(atomic_add_int_nv)
SET_SIZE(atomic_add_32_nv)
/*
* NOTE: If atomic_add_64 and atomic_add_64_nv are ever
* separated, it is important to edit the libc i386 platform
* specific mapfile and remove the NODYNSORT attribute
* from atomic_add_64_nv.
*/
ENTRY(atomic_add_64)
ALTENTRY(atomic_add_64_nv)
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movl (%edi), %eax
movl 4(%edi), %edx
1:
movl 16(%esp), %ebx
movl 20(%esp), %ecx
addl %eax, %ebx
adcl %edx, %ecx
lock
cmpxchg8b (%edi)
jne 1b
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
popl %edi
ret
SET_SIZE(atomic_add_64_nv)
SET_SIZE(atomic_add_64)
ENTRY(atomic_or_8_nv)
ALTENTRY(atomic_or_uchar_nv)
movl 4(%esp), %edx
movb (%edx), %al
1:
movl 8(%esp), %ecx
orb %al, %cl
lock
cmpxchgb %cl, (%edx)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_or_uchar_nv)
SET_SIZE(atomic_or_8_nv)
ENTRY(atomic_or_16_nv)
ALTENTRY(atomic_or_ushort_nv)
movl 4(%esp), %edx
movw (%edx), %ax
1:
movl 8(%esp), %ecx
orw %ax, %cx
lock
cmpxchgw %cx, (%edx)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_or_ushort_nv)
SET_SIZE(atomic_or_16_nv)
ENTRY(atomic_or_32_nv)
ALTENTRY(atomic_or_uint_nv)
ALTENTRY(atomic_or_ulong_nv)
movl 4(%esp), %edx
movl (%edx), %eax
1:
movl 8(%esp), %ecx
orl %eax, %ecx
lock
cmpxchgl %ecx, (%edx)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_or_ulong_nv)
SET_SIZE(atomic_or_uint_nv)
SET_SIZE(atomic_or_32_nv)
/*
* NOTE: If atomic_or_64 and atomic_or_64_nv are ever
* separated, it is important to edit the libc i386 platform
* specific mapfile and remove the NODYNSORT attribute
* from atomic_or_64_nv.
*/
ENTRY(atomic_or_64)
ALTENTRY(atomic_or_64_nv)
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movl (%edi), %eax
movl 4(%edi), %edx
1:
movl 16(%esp), %ebx
movl 20(%esp), %ecx
orl %eax, %ebx
orl %edx, %ecx
lock
cmpxchg8b (%edi)
jne 1b
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
popl %edi
ret
SET_SIZE(atomic_or_64_nv)
SET_SIZE(atomic_or_64)
ENTRY(atomic_and_8_nv)
ALTENTRY(atomic_and_uchar_nv)
movl 4(%esp), %edx
movb (%edx), %al
1:
movl 8(%esp), %ecx
andb %al, %cl
lock
cmpxchgb %cl, (%edx)
jne 1b
movzbl %cl, %eax
ret
SET_SIZE(atomic_and_uchar_nv)
SET_SIZE(atomic_and_8_nv)
ENTRY(atomic_and_16_nv)
ALTENTRY(atomic_and_ushort_nv)
movl 4(%esp), %edx
movw (%edx), %ax
1:
movl 8(%esp), %ecx
andw %ax, %cx
lock
cmpxchgw %cx, (%edx)
jne 1b
movzwl %cx, %eax
ret
SET_SIZE(atomic_and_ushort_nv)
SET_SIZE(atomic_and_16_nv)
ENTRY(atomic_and_32_nv)
ALTENTRY(atomic_and_uint_nv)
ALTENTRY(atomic_and_ulong_nv)
movl 4(%esp), %edx
movl (%edx), %eax
1:
movl 8(%esp), %ecx
andl %eax, %ecx
lock
cmpxchgl %ecx, (%edx)
jne 1b
movl %ecx, %eax
ret
SET_SIZE(atomic_and_ulong_nv)
SET_SIZE(atomic_and_uint_nv)
SET_SIZE(atomic_and_32_nv)
/*
* NOTE: If atomic_and_64 and atomic_and_64_nv are ever
* separated, it is important to edit the libc i386 platform
* specific mapfile and remove the NODYNSORT attribute
* from atomic_and_64_nv.
*/
ENTRY(atomic_and_64)
ALTENTRY(atomic_and_64_nv)
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movl (%edi), %eax
movl 4(%edi), %edx
1:
movl 16(%esp), %ebx
movl 20(%esp), %ecx
andl %eax, %ebx
andl %edx, %ecx
lock
cmpxchg8b (%edi)
jne 1b
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
popl %edi
ret
SET_SIZE(atomic_and_64_nv)
SET_SIZE(atomic_and_64)
ENTRY(atomic_cas_8)
ALTENTRY(atomic_cas_uchar)
movl 4(%esp), %edx
movzbl 8(%esp), %eax
movb 12(%esp), %cl
lock
cmpxchgb %cl, (%edx)
ret
SET_SIZE(atomic_cas_uchar)
SET_SIZE(atomic_cas_8)
ENTRY(atomic_cas_16)
ALTENTRY(atomic_cas_ushort)
movl 4(%esp), %edx
movzwl 8(%esp), %eax
movw 12(%esp), %cx
lock
cmpxchgw %cx, (%edx)
ret
SET_SIZE(atomic_cas_ushort)
SET_SIZE(atomic_cas_16)
ENTRY(atomic_cas_32)
ALTENTRY(atomic_cas_uint)
ALTENTRY(atomic_cas_ulong)
ALTENTRY(atomic_cas_ptr)
movl 4(%esp), %edx
movl 8(%esp), %eax
movl 12(%esp), %ecx
lock
cmpxchgl %ecx, (%edx)
ret
SET_SIZE(atomic_cas_ptr)
SET_SIZE(atomic_cas_ulong)
SET_SIZE(atomic_cas_uint)
SET_SIZE(atomic_cas_32)
ENTRY(atomic_cas_64)
pushl %ebx
pushl %esi
movl 12(%esp), %esi
movl 16(%esp), %eax
movl 20(%esp), %edx
movl 24(%esp), %ebx
movl 28(%esp), %ecx
lock
cmpxchg8b (%esi)
popl %esi
popl %ebx
ret
SET_SIZE(atomic_cas_64)
ENTRY(atomic_swap_8)
ALTENTRY(atomic_swap_uchar)
movl 4(%esp), %edx
movzbl 8(%esp), %eax
lock
xchgb %al, (%edx)
ret
SET_SIZE(atomic_swap_uchar)
SET_SIZE(atomic_swap_8)
ENTRY(atomic_swap_16)
ALTENTRY(atomic_swap_ushort)
movl 4(%esp), %edx
movzwl 8(%esp), %eax
lock
xchgw %ax, (%edx)
ret
SET_SIZE(atomic_swap_ushort)
SET_SIZE(atomic_swap_16)
ENTRY(atomic_swap_32)
ALTENTRY(atomic_swap_uint)
ALTENTRY(atomic_swap_ptr)
ALTENTRY(atomic_swap_ulong)
movl 4(%esp), %edx
movl 8(%esp), %eax
lock
xchgl %eax, (%edx)
ret
SET_SIZE(atomic_swap_ulong)
SET_SIZE(atomic_swap_ptr)
SET_SIZE(atomic_swap_uint)
SET_SIZE(atomic_swap_32)
ENTRY(atomic_swap_64)
pushl %esi
pushl %ebx
movl 12(%esp), %esi
movl 16(%esp), %ebx
movl 20(%esp), %ecx
movl (%esi), %eax
movl 4(%esi), %edx
1:
lock
cmpxchg8b (%esi)
jne 1b
popl %ebx
popl %esi
ret
SET_SIZE(atomic_swap_64)
ENTRY(atomic_set_long_excl)
movl 4(%esp), %edx
movl 8(%esp), %ecx
xorl %eax, %eax
lock
btsl %ecx, (%edx)
jnc 1f
decl %eax
1:
ret
SET_SIZE(atomic_set_long_excl)
ENTRY(atomic_clear_long_excl)
movl 4(%esp), %edx
movl 8(%esp), %ecx
xorl %eax, %eax
lock
btrl %ecx, (%edx)
jc 1f
decl %eax
1:
ret
SET_SIZE(atomic_clear_long_excl)
#if !defined(_KERNEL)
/*
* NOTE: membar_enter, membar_exit, membar_producer, and
* membar_consumer are all identical routines. We define them
* separately, instead of using ALTENTRY definitions to alias them
* together, so that DTrace and debuggers will see a unique address
* for them, allowing more accurate tracing.
*/
ENTRY(membar_enter)
lock
xorl $0, (%esp)
ret
SET_SIZE(membar_enter)
ENTRY(membar_exit)
lock
xorl $0, (%esp)
ret
SET_SIZE(membar_exit)
ENTRY(membar_producer)
lock
xorl $0, (%esp)
ret
SET_SIZE(membar_producer)
ENTRY(membar_consumer)
lock
xorl $0, (%esp)
ret
SET_SIZE(membar_consumer)
#endif /* !_KERNEL */
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits
#endif
+4
View File
@@ -0,0 +1,4 @@
subdir-m += amd64 i386 ia32 rpc sparc64 sys tsol
DISTFILES = assert.h atomic.h devid.h dirent.h libc.h libdevinfo.h libgen.h
DISTFILES += mtlib.h priv.h stdarg.h stdio_ext.h strings.h ucred.h zone.h
@@ -0,0 +1 @@
subdir-m += sys
@@ -0,0 +1,2 @@
DISTFILES = asm_linkage.h
@@ -0,0 +1,5 @@
#ifndef __amd64
#define __amd64
#endif
#include <ia32/sys/asm_linkage.h>
+42
View File
@@ -0,0 +1,42 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SOL_ASSERT_H
#define _SOL_ASSERT_H
#include_next <assert.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef __assert_c99
static inline void __assert_c99(const char *expr, const char *file, int line, const char *func)
{
fprintf(stderr, "%s:%i: %s: Assertion `%s` failed.\n", file, line, func, expr);
abort();
}
#endif
#endif
+438
View File
@@ -0,0 +1,438 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ATOMIC_H
#define _SYS_ATOMIC_H
#include <sys/types.h>
#include <sys/inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_KERNEL) && defined(__GNUC__) && defined(_ASM_INLINES) && \
(defined(__i386) || defined(__amd64))
#include <asm/atomic.h>
#endif
#if defined(_KERNEL) || defined(__STDC__)
/*
* Increment target.
*/
extern void atomic_inc_8(volatile uint8_t *);
extern void atomic_inc_uchar(volatile uchar_t *);
extern void atomic_inc_16(volatile uint16_t *);
extern void atomic_inc_ushort(volatile ushort_t *);
extern void atomic_inc_32(volatile uint32_t *);
extern void atomic_inc_uint(volatile uint_t *);
extern void atomic_inc_ulong(volatile ulong_t *);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern void atomic_inc_64(volatile uint64_t *);
#endif
/*
* Decrement target
*/
extern void atomic_dec_8(volatile uint8_t *);
extern void atomic_dec_uchar(volatile uchar_t *);
extern void atomic_dec_16(volatile uint16_t *);
extern void atomic_dec_ushort(volatile ushort_t *);
extern void atomic_dec_32(volatile uint32_t *);
extern void atomic_dec_uint(volatile uint_t *);
extern void atomic_dec_ulong(volatile ulong_t *);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern void atomic_dec_64(volatile uint64_t *);
#endif
/*
* Add delta to target
*/
extern void atomic_add_8(volatile uint8_t *, int8_t);
extern void atomic_add_char(volatile uchar_t *, signed char);
extern void atomic_add_16(volatile uint16_t *, int16_t);
extern void atomic_add_short(volatile ushort_t *, short);
extern void atomic_add_32(volatile uint32_t *, int32_t);
extern void atomic_add_int(volatile uint_t *, int);
extern void atomic_add_ptr(volatile void *, ssize_t);
extern void atomic_add_long(volatile ulong_t *, long);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern void atomic_add_64(volatile uint64_t *, int64_t);
#endif
/*
* logical OR bits with target
*/
extern void atomic_or_8(volatile uint8_t *, uint8_t);
extern void atomic_or_uchar(volatile uchar_t *, uchar_t);
extern void atomic_or_16(volatile uint16_t *, uint16_t);
extern void atomic_or_ushort(volatile ushort_t *, ushort_t);
extern void atomic_or_32(volatile uint32_t *, uint32_t);
extern void atomic_or_uint(volatile uint_t *, uint_t);
extern void atomic_or_ulong(volatile ulong_t *, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern void atomic_or_64(volatile uint64_t *, uint64_t);
#endif
/*
* logical AND bits with target
*/
extern void atomic_and_8(volatile uint8_t *, uint8_t);
extern void atomic_and_uchar(volatile uchar_t *, uchar_t);
extern void atomic_and_16(volatile uint16_t *, uint16_t);
extern void atomic_and_ushort(volatile ushort_t *, ushort_t);
extern void atomic_and_32(volatile uint32_t *, uint32_t);
extern void atomic_and_uint(volatile uint_t *, uint_t);
extern void atomic_and_ulong(volatile ulong_t *, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern void atomic_and_64(volatile uint64_t *, uint64_t);
#endif
/*
* As above, but return the new value. Note that these _nv() variants are
* substantially more expensive on some platforms than the no-return-value
* versions above, so don't use them unless you really need to know the
* new value *atomically* (e.g. when decrementing a reference count and
* checking whether it went to zero).
*/
/*
* Increment target and return new value.
*/
extern uint8_t atomic_inc_8_nv(volatile uint8_t *);
extern uchar_t atomic_inc_uchar_nv(volatile uchar_t *);
extern uint16_t atomic_inc_16_nv(volatile uint16_t *);
extern ushort_t atomic_inc_ushort_nv(volatile ushort_t *);
extern uint32_t atomic_inc_32_nv(volatile uint32_t *);
extern uint_t atomic_inc_uint_nv(volatile uint_t *);
extern ulong_t atomic_inc_ulong_nv(volatile ulong_t *);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_inc_64_nv(volatile uint64_t *);
#endif
/*
* Decrement target and return new value.
*/
extern uint8_t atomic_dec_8_nv(volatile uint8_t *);
extern uchar_t atomic_dec_uchar_nv(volatile uchar_t *);
extern uint16_t atomic_dec_16_nv(volatile uint16_t *);
extern ushort_t atomic_dec_ushort_nv(volatile ushort_t *);
extern uint32_t atomic_dec_32_nv(volatile uint32_t *);
extern uint_t atomic_dec_uint_nv(volatile uint_t *);
extern ulong_t atomic_dec_ulong_nv(volatile ulong_t *);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_dec_64_nv(volatile uint64_t *);
#endif
/*
* Add delta to target
*/
extern uint8_t atomic_add_8_nv(volatile uint8_t *, int8_t);
extern uchar_t atomic_add_char_nv(volatile uchar_t *, signed char);
extern uint16_t atomic_add_16_nv(volatile uint16_t *, int16_t);
extern ushort_t atomic_add_short_nv(volatile ushort_t *, short);
extern uint32_t atomic_add_32_nv(volatile uint32_t *, int32_t);
extern uint_t atomic_add_int_nv(volatile uint_t *, int);
extern void *atomic_add_ptr_nv(volatile void *, ssize_t);
extern ulong_t atomic_add_long_nv(volatile ulong_t *, long);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_add_64_nv(volatile uint64_t *, int64_t);
#endif
/*
* logical OR bits with target and return new value.
*/
extern uint8_t atomic_or_8_nv(volatile uint8_t *, uint8_t);
extern uchar_t atomic_or_uchar_nv(volatile uchar_t *, uchar_t);
extern uint16_t atomic_or_16_nv(volatile uint16_t *, uint16_t);
extern ushort_t atomic_or_ushort_nv(volatile ushort_t *, ushort_t);
extern uint32_t atomic_or_32_nv(volatile uint32_t *, uint32_t);
extern uint_t atomic_or_uint_nv(volatile uint_t *, uint_t);
extern ulong_t atomic_or_ulong_nv(volatile ulong_t *, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_or_64_nv(volatile uint64_t *, uint64_t);
#endif
/*
* logical AND bits with target and return new value.
*/
extern uint8_t atomic_and_8_nv(volatile uint8_t *, uint8_t);
extern uchar_t atomic_and_uchar_nv(volatile uchar_t *, uchar_t);
extern uint16_t atomic_and_16_nv(volatile uint16_t *, uint16_t);
extern ushort_t atomic_and_ushort_nv(volatile ushort_t *, ushort_t);
extern uint32_t atomic_and_32_nv(volatile uint32_t *, uint32_t);
extern uint_t atomic_and_uint_nv(volatile uint_t *, uint_t);
extern ulong_t atomic_and_ulong_nv(volatile ulong_t *, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_and_64_nv(volatile uint64_t *, uint64_t);
#endif
/*
* If *arg1 == arg2, set *arg1 = arg3; return old value
*/
extern uint8_t atomic_cas_8(volatile uint8_t *, uint8_t, uint8_t);
extern uchar_t atomic_cas_uchar(volatile uchar_t *, uchar_t, uchar_t);
extern uint16_t atomic_cas_16(volatile uint16_t *, uint16_t, uint16_t);
extern ushort_t atomic_cas_ushort(volatile ushort_t *, ushort_t, ushort_t);
extern uint32_t atomic_cas_32(volatile uint32_t *, uint32_t, uint32_t);
extern uint_t atomic_cas_uint(volatile uint_t *, uint_t, uint_t);
extern void *atomic_cas_ptr(volatile void *, void *, void *);
extern ulong_t atomic_cas_ulong(volatile ulong_t *, ulong_t, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_cas_64(volatile uint64_t *, uint64_t, uint64_t);
#endif
/*
* Swap target and return old value
*/
extern uint8_t atomic_swap_8(volatile uint8_t *, uint8_t);
extern uchar_t atomic_swap_uchar(volatile uchar_t *, uchar_t);
extern uint16_t atomic_swap_16(volatile uint16_t *, uint16_t);
extern ushort_t atomic_swap_ushort(volatile ushort_t *, ushort_t);
extern uint32_t atomic_swap_32(volatile uint32_t *, uint32_t);
extern uint_t atomic_swap_uint(volatile uint_t *, uint_t);
extern void *atomic_swap_ptr(volatile void *, void *);
extern ulong_t atomic_swap_ulong(volatile ulong_t *, ulong_t);
#if defined(_KERNEL) || defined(_INT64_TYPE)
extern uint64_t atomic_swap_64(volatile uint64_t *, uint64_t);
#endif
/*
* Perform an exclusive atomic bit set/clear on a target.
* Returns 0 if bit was sucessfully set/cleared, or -1
* if the bit was already set/cleared.
*/
extern int atomic_set_long_excl(volatile ulong_t *, uint_t);
extern int atomic_clear_long_excl(volatile ulong_t *, uint_t);
/*
* Generic memory barrier used during lock entry, placed after the
* memory operation that acquires the lock to guarantee that the lock
* protects its data. No stores from after the memory barrier will
* reach visibility, and no loads from after the barrier will be
* resolved, before the lock acquisition reaches global visibility.
*/
extern void membar_enter(void);
/*
* Generic memory barrier used during lock exit, placed before the
* memory operation that releases the lock to guarantee that the lock
* protects its data. All loads and stores issued before the barrier
* will be resolved before the subsequent lock update reaches visibility.
*/
extern void membar_exit(void);
/*
* Arrange that all stores issued before this point in the code reach
* global visibility before any stores that follow; useful in producer
* modules that update a data item, then set a flag that it is available.
* The memory barrier guarantees that the available flag is not visible
* earlier than the updated data, i.e. it imposes store ordering.
*/
extern void membar_producer(void);
/*
* Arrange that all loads issued before this point in the code are
* completed before any subsequent loads; useful in consumer modules
* that check to see if data is available and read the data.
* The memory barrier guarantees that the data is not sampled until
* after the available flag has been seen, i.e. it imposes load ordering.
*/
extern void membar_consumer(void);
#endif
#if !defined(_KERNEL) && !defined(__STDC__)
extern void atomic_inc_8();
extern void atomic_inc_uchar();
extern void atomic_inc_16();
extern void atomic_inc_ushort();
extern void atomic_inc_32();
extern void atomic_inc_uint();
extern void atomic_inc_ulong();
#if defined(_INT64_TYPE)
extern void atomic_inc_64();
#endif /* defined(_INT64_TYPE) */
extern void atomic_dec_8();
extern void atomic_dec_uchar();
extern void atomic_dec_16();
extern void atomic_dec_ushort();
extern void atomic_dec_32();
extern void atomic_dec_uint();
extern void atomic_dec_ulong();
#if defined(_INT64_TYPE)
extern void atomic_dec_64();
#endif /* defined(_INT64_TYPE) */
extern void atomic_add_8();
extern void atomic_add_char();
extern void atomic_add_16();
extern void atomic_add_short();
extern void atomic_add_32();
extern void atomic_add_int();
extern void atomic_add_ptr();
extern void atomic_add_long();
#if defined(_INT64_TYPE)
extern void atomic_add_64();
#endif /* defined(_INT64_TYPE) */
extern void atomic_or_8();
extern void atomic_or_uchar();
extern void atomic_or_16();
extern void atomic_or_ushort();
extern void atomic_or_32();
extern void atomic_or_uint();
extern void atomic_or_ulong();
#if defined(_INT64_TYPE)
extern void atomic_or_64();
#endif /* defined(_INT64_TYPE) */
extern void atomic_and_8();
extern void atomic_and_uchar();
extern void atomic_and_16();
extern void atomic_and_ushort();
extern void atomic_and_32();
extern void atomic_and_uint();
extern void atomic_and_ulong();
#if defined(_INT64_TYPE)
extern void atomic_and_64();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_inc_8_nv();
extern uchar_t atomic_inc_uchar_nv();
extern uint16_t atomic_inc_16_nv();
extern ushort_t atomic_inc_ushort_nv();
extern uint32_t atomic_inc_32_nv();
extern uint_t atomic_inc_uint_nv();
extern ulong_t atomic_inc_ulong_nv();
#if defined(_INT64_TYPE)
extern uint64_t atomic_inc_64_nv();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_dec_8_nv();
extern uchar_t atomic_dec_uchar_nv();
extern uint16_t atomic_dec_16_nv();
extern ushort_t atomic_dec_ushort_nv();
extern uint32_t atomic_dec_32_nv();
extern uint_t atomic_dec_uint_nv();
extern ulong_t atomic_dec_ulong_nv();
#if defined(_INT64_TYPE)
extern uint64_t atomic_dec_64_nv();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_add_8_nv();
extern uchar_t atomic_add_char_nv();
extern uint16_t atomic_add_16_nv();
extern ushort_t atomic_add_short_nv();
extern uint32_t atomic_add_32_nv();
extern uint_t atomic_add_int_nv();
extern void *atomic_add_ptr_nv();
extern ulong_t atomic_add_long_nv();
#if defined(_INT64_TYPE)
extern uint64_t atomic_add_64_nv();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_or_8_nv();
extern uchar_t atomic_or_uchar_nv();
extern uint16_t atomic_or_16_nv();
extern ushort_t atomic_or_ushort_nv();
extern uint32_t atomic_or_32_nv();
extern uint_t atomic_or_uint_nv();
extern ulong_t atomic_or_ulong_nv();
#if defined(_INT64_TYPE)
extern uint64_t atomic_or_64_nv();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_and_8_nv();
extern uchar_t atomic_and_uchar_nv();
extern uint16_t atomic_and_16_nv();
extern ushort_t atomic_and_ushort_nv();
extern uint32_t atomic_and_32_nv();
extern uint_t atomic_and_uint_nv();
extern ulong_t atomic_and_ulong_nv();
#if defined(_INT64_TYPE)
extern uint64_t atomic_and_64_nv();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_cas_8();
extern uchar_t atomic_cas_uchar();
extern uint16_t atomic_cas_16();
extern ushort_t atomic_cas_ushort();
extern uint32_t atomic_cas_32();
extern uint_t atomic_cas_uint();
extern void *atomic_cas_ptr();
extern ulong_t atomic_cas_ulong();
#if defined(_INT64_TYPE)
extern uint64_t atomic_cas_64();
#endif /* defined(_INT64_TYPE) */
extern uint8_t atomic_swap_8();
extern uchar_t atomic_swap_uchar();
extern uint16_t atomic_swap_16();
extern ushort_t atomic_swap_ushort();
extern uint32_t atomic_swap_32();
extern uint_t atomic_swap_uint();
extern void *atomic_swap_ptr();
extern ulong_t atomic_swap_ulong();
#if defined(_INT64_TYPE)
extern uint64_t atomic_swap_64();
#endif /* defined(_INT64_TYPE) */
extern int atomic_set_long_excl();
extern int atomic_clear_long_excl();
extern void membar_enter();
extern void membar_exit();
extern void membar_producer();
extern void membar_consumer();
#endif
#if defined(_KERNEL)
#if defined(_LP64) || defined(_ILP32)
#define atomic_add_ip atomic_add_long
#define atomic_add_ip_nv atomic_add_long_nv
#define casip atomic_cas_ulong
#endif
#if defined(__sparc)
extern uint8_t ldstub(uint8_t *);
#endif
/*
* Legacy kernel interfaces; they will go away (eventually).
*/
extern uint8_t cas8(uint8_t *, uint8_t, uint8_t);
extern uint32_t cas32(uint32_t *, uint32_t, uint32_t);
extern uint64_t cas64(uint64_t *, uint64_t, uint64_t);
extern ulong_t caslong(ulong_t *, ulong_t, ulong_t);
extern void *casptr(void *, void *, void *);
extern void atomic_and_long(ulong_t *, ulong_t);
extern void atomic_or_long(ulong_t *, ulong_t);
#if defined(__sparc)
extern uint32_t swapl(uint32_t *, uint32_t);
#endif
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ATOMIC_H */
+48
View File
@@ -0,0 +1,48 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _DEVID_H
#define _DEVID_H
#include <stdlib.h>
typedef int ddi_devid_t;
typedef struct devid_nmlist {
char *devname;
dev_t dev;
} devid_nmlist_t;
static inline int devid_str_decode(char *devidstr, ddi_devid_t *retdevid, char **retminor_name) { abort(); }
static inline int devid_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name, devid_nmlist_t **retlist) { abort(); }
static inline void devid_str_free(char *str) { abort(); }
static inline void devid_free(ddi_devid_t devid) { abort(); }
static inline void devid_free_nmlist(devid_nmlist_t *list) { abort(); }
static inline int devid_get(int fd, ddi_devid_t *retdevid) { return -1; }
static inline int devid_get_minor_name(int fd, char **retminor_name) { abort(); }
static inline char *devid_str_encode(ddi_devid_t devid, char *minor_name) { abort(); }
#endif
+36
View File
@@ -0,0 +1,36 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SOL_DIRENT_H
#define _SOL_DIRENT_H
#include_next <dirent.h>
#ifdef IFTODT
#undef IFTODT
#endif
#endif
@@ -0,0 +1 @@
subdir-m += sys
@@ -0,0 +1,2 @@
DISTFILES = asm_linkage.h
@@ -0,0 +1,5 @@
#ifndef __i386
#define __i386
#endif
#include <ia32/sys/asm_linkage.h>
@@ -0,0 +1 @@
subdir-m += sys
@@ -0,0 +1,2 @@
DISTFILES = asm_linkage.h
@@ -0,0 +1,307 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _IA32_SYS_ASM_LINKAGE_H
#define _IA32_SYS_ASM_LINKAGE_H
#include <sys/stack.h>
#include <sys/trap.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _ASM /* The remainder of this file is only for assembly files */
/*
* make annoying differences in assembler syntax go away
*/
/*
* D16 and A16 are used to insert instructions prefixes; the
* macros help the assembler code be slightly more portable.
*/
#if !defined(__GNUC_AS__)
/*
* /usr/ccs/bin/as prefixes are parsed as separate instructions
*/
#define D16 data16;
#define A16 addr16;
/*
* (There are some weird constructs in constant expressions)
*/
#define _CONST(const) [const]
#define _BITNOT(const) -1!_CONST(const)
#define _MUL(a, b) _CONST(a \* b)
#else
/*
* Why not use the 'data16' and 'addr16' prefixes .. well, the
* assembler doesn't quite believe in real mode, and thus argues with
* us about what we're trying to do.
*/
#define D16 .byte 0x66;
#define A16 .byte 0x67;
#define _CONST(const) (const)
#define _BITNOT(const) ~_CONST(const)
#define _MUL(a, b) _CONST(a * b)
#endif
/*
* C pointers are different sizes between i386 and amd64.
* These constants can be used to compute offsets into pointer arrays.
*/
#if defined(__amd64)
#define CLONGSHIFT 3
#define CLONGSIZE 8
#define CLONGMASK 7
#elif defined(__i386)
#define CLONGSHIFT 2
#define CLONGSIZE 4
#define CLONGMASK 3
#endif
/*
* Since we know we're either ILP32 or LP64 ..
*/
#define CPTRSHIFT CLONGSHIFT
#define CPTRSIZE CLONGSIZE
#define CPTRMASK CLONGMASK
#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
#error "inconsistent shift constants"
#endif
#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
#error "inconsistent mask constants"
#endif
#define ASM_ENTRY_ALIGN 16
/*
* SSE register alignment and save areas
*/
#define XMM_SIZE 16
#define XMM_ALIGN 16
#if defined(__amd64)
#define SAVE_XMM_PROLOG(sreg, nreg) \
subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \
movq %rsp, sreg
#define RSTOR_XMM_EPILOG(sreg, nreg) \
addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp
#elif defined(__i386)
#define SAVE_XMM_PROLOG(sreg, nreg) \
subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
movl %esp, sreg; \
addl $XMM_ALIGN, sreg; \
andl $_BITNOT(XMM_ALIGN-1), sreg
#define RSTOR_XMM_EPILOG(sreg, nreg) \
addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
#endif /* __i386 */
/*
* profiling causes definitions of the MCOUNT and RTMCOUNT
* particular to the type
*/
#ifdef GPROF
#define MCOUNT(x) \
pushl %ebp; \
movl %esp, %ebp; \
call _mcount; \
popl %ebp
#endif /* GPROF */
#ifdef PROF
#define MCOUNT(x) \
/* CSTYLED */ \
.lcomm .L_/**/x/**/1, 4, 4; \
pushl %ebp; \
movl %esp, %ebp; \
/* CSTYLED */ \
movl $.L_/**/x/**/1, %edx; \
call _mcount; \
popl %ebp
#endif /* PROF */
/*
* if we are not profiling, MCOUNT should be defined to nothing
*/
#if !defined(PROF) && !defined(GPROF)
#define MCOUNT(x)
#endif /* !defined(PROF) && !defined(GPROF) */
#define RTMCOUNT(x) MCOUNT(x)
/*
* Macro to define weak symbol aliases. These are similar to the ANSI-C
* #pragma weak name = _name
* except a compiler can determine type. The assembler must be told. Hence,
* the second parameter must be the type of the symbol (i.e.: function,...)
*/
#define ANSI_PRAGMA_WEAK(sym, stype) \
.weak sym; \
.type sym, @stype; \
/* CSTYLED */ \
sym = _/**/sym
/*
* Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
* #pragma weak sym1 = sym2
*/
#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \
.weak sym1; \
.type sym1, @stype; \
sym1 = sym2
/*
* ENTRY provides the standard procedure entry code and an easy way to
* insert the calls to mcount for profiling. ENTRY_NP is identical, but
* never calls mcount.
*/
#define ENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
.type x, @function; \
x: MCOUNT(x)
#define ENTRY_NP(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
.type x, @function; \
x:
#define RTENTRY(x) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x; \
.type x, @function; \
x: RTMCOUNT(x)
/*
* ENTRY2 is identical to ENTRY but provides two labels for the entry point.
*/
#define ENTRY2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
.type x, @function; \
.type y, @function; \
/* CSTYLED */ \
x: ; \
y: MCOUNT(x)
#define ENTRY_NP2(x, y) \
.text; \
.align ASM_ENTRY_ALIGN; \
.globl x, y; \
.type x, @function; \
.type y, @function; \
/* CSTYLED */ \
x: ; \
y:
/*
* ALTENTRY provides for additional entry points.
*/
#define ALTENTRY(x) \
.globl x; \
.type x, @function; \
x:
/*
* DGDEF and DGDEF2 provide global data declarations.
*
* DGDEF provides a word aligned word of storage.
*
* DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This
* implies this macro is best used for byte arrays.
*
* DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
*/
#define DGDEF2(name, sz) \
.data; \
.globl name; \
.type name, @object; \
.size name, sz; \
name:
#define DGDEF3(name, sz, algn) \
.data; \
.align algn; \
.globl name; \
.type name, @object; \
.size name, sz; \
name:
#define DGDEF(name) DGDEF3(name, 4, 4)
/*
* SET_SIZE trails a function and set the size for the ELF symbol table.
*/
#define SET_SIZE(x) \
.size x, [.-x]
/*
* NWORD provides native word value.
*/
#if defined(__amd64)
/*CSTYLED*/
#define NWORD quad
#elif defined(__i386)
#define NWORD long
#endif /* __i386 */
#endif /* _ASM */
#ifdef __cplusplus
}
#endif
#endif /* _IA32_SYS_ASM_LINKAGE_H */
View File
+35
View File
@@ -0,0 +1,35 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SOL_LIBGEN_H
#define _SOL_LIBGEN_H
#include_next <libgen.h>
#include <sys/types.h>
extern int mkdirp(const char *path, mode_t mode);
#endif
+37
View File
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _PRIV_H
#define _PRIV_H
#include <sys/types.h>
/* Couldn't find this definition in OpenGrok */
#define PRIV_SYS_CONFIG "sys_config"
static inline boolean_t priv_ineffect(const char *priv) { return B_TRUE; }
#endif
@@ -0,0 +1,2 @@
DISTFILES = xdr.h
+118
View File
@@ -0,0 +1,118 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#ifndef _SOL_RPC_XDR_H_
#define _SOL_RPC_XDR_H_
#include_next <rpc/xdr.h>
/*
* Strangely, my glibc version (2.3.6) doesn't have xdr_control(), so
* we have to hack it in here (source taken from OpenSolaris).
* By the way, it is assumed the xdrmem implementation is used.
*/
#define xdr_control(a,b,c) xdrmem_control(a,b,c)
/*
* These are XDR control operators
*/
#define XDR_GET_BYTES_AVAIL 1
struct xdr_bytesrec {
bool_t xc_is_last_record;
size_t xc_num_avail;
};
typedef struct xdr_bytesrec xdr_bytesrec;
/*
* These are the request arguments to XDR_CONTROL.
*
* XDR_PEEK - returns the contents of the next XDR unit on the XDR stream.
* XDR_SKIPBYTES - skips the next N bytes in the XDR stream.
* XDR_RDMAGET - for xdr implementation over RDMA, gets private flags from
* the XDR stream being moved over RDMA
* XDR_RDMANOCHUNK - for xdr implementaion over RDMA, sets private flags in
* the XDR stream moving over RDMA.
*/
#define XDR_PEEK 2
#define XDR_SKIPBYTES 3
#define XDR_RDMAGET 4
#define XDR_RDMASET 5
/* FIXME: probably doesn't work */
static bool_t
xdrmem_control(XDR *xdrs, int request, void *info)
{
xdr_bytesrec *xptr;
int32_t *int32p;
int len;
switch (request) {
case XDR_GET_BYTES_AVAIL:
xptr = (xdr_bytesrec *)info;
xptr->xc_is_last_record = TRUE;
xptr->xc_num_avail = xdrs->x_handy;
return (TRUE);
case XDR_PEEK:
/*
* Return the next 4 byte unit in the XDR stream.
*/
if (xdrs->x_handy < sizeof (int32_t))
return (FALSE);
int32p = (int32_t *)info;
*int32p = (int32_t)ntohl((uint32_t)
(*((int32_t *)(xdrs->x_private))));
return (TRUE);
case XDR_SKIPBYTES:
/*
* Skip the next N bytes in the XDR stream.
*/
int32p = (int32_t *)info;
len = RNDUP((int)(*int32p));
if ((xdrs->x_handy -= len) < 0)
return (FALSE);
xdrs->x_private += len;
return (TRUE);
}
return (FALSE);
}
#endif

Some files were not shown because too many files have changed in this diff Show More