mirror_zfs/module/zfs/zpl_inode.c
Brian Behlendorf 057e8eee35 Improve fstat(2) performance
There is at most a factor of 3x performance improvement to be
had by using the Linux generic_fillattr() helper.  However, to
use it safely we need to ensure the values in a cached inode
are kept rigerously up to date.  Unfortunately, this isn't
the case for the blksize, blocks, and atime fields.  At the
moment the authoritative values are still stored in the znode.

This patch introduces an optimized zfs_getattr_fast() call.
The idea is to use the up to date values from the inode and
the blksize, block, and atime fields from the znode.  At some
latter date we should be able to strictly use the inode values
and further improve performance.

The remaining overhead in the zfs_getattr_fast() call can be
attributed to having to take the znode mutex.  This overhead is
unavoidable until the inode is kept strictly up to date.  The
the careful reader will notice the we do not use the customary
ZFS_ENTER()/ZFS_EXIT() macros.  These macro's are designed to
ensure the filesystem is not torn down in the middle of an
operation.  However, in this case the VFS is holding a
reference on the active inode so we know this is impossible.

=================== Performance Tests ========================

This test calls the fstat(2) system call 10,000,000 times on
an open file description in a tight loop.  The test results
show the zfs stat(2) performance is now only 22% slower than
ext4.  This is a 2.5x improvement and there is a clear long
term plan to get to parity with ext4.

filesystem    | test-1  test-2  test-3  | average | times-ext4
--------------+-------------------------+---------+-----------
ext4          |  7.785s  7.899s  7.284s |  7.656s | 1.000x
zfs-0.6.0-rc4 | 24.052s 22.531s 23.857s | 23.480s | 3.066x
zfs-faststat  |  9.224s  9.398s  9.485s |  9.369s | 1.223x

The second test is to run 'du' of a copy of the /usr tree
which contains 110514 files.  The test is run multiple times
both using both a cold cache (/proc/sys/vm/drop_caches) and
a hot cache.  As expected this change signigicantly improved
the zfs hot cache performance and doesn't quite bring zfs to
parity with ext4.

A little surprisingly the zfs cold cache performance is better
than ext4.  This can probably be attributed to the zfs allocation
policy of co-locating all the meta data on disk which minimizes
seek times.  By default the ext4 allocator will spread the data
over the entire disk only co-locating each directory.

filesystem    | cold    | hot
--------------+---------+--------
ext4          | 13.318s | 1.040s
zfs-0.6.0-rc4 |  4.982s | 1.762s
zfs-faststat  |  4.933s | 1.345s
2011-07-11 09:11:22 -07:00

365 lines
7.7 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
*/
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
#include <sys/vfs.h>
#include <sys/zpl.h>
static struct dentry *
zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
cred_t *cr = CRED();
struct inode *ip;
int error;
crhold(cr);
error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
ASSERT3S(error, <=, 0);
crfree(cr);
if (error) {
if (error == -ENOENT)
return d_splice_alias(NULL, dentry);
else
return ERR_PTR(error);
}
return d_splice_alias(ip, dentry);
}
static int
zpl_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
cred_t *cr = CRED();
struct inode *ip;
vattr_t *vap;
int error;
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = mode;
vap->va_mask = ATTR_MODE;
vap->va_uid = crgetfsuid(cr);
vap->va_gid = crgetfsgid(cr);
vap->va_dentry = dentry;
error = -zfs_create(dir, (char *)dentry->d_name.name,
vap, 0, mode, &ip, cr, 0, NULL);
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
{
cred_t *cr = CRED();
struct inode *ip;
vattr_t *vap;
int error;
/*
* We currently expect Linux to supply rdev=0 for all sockets
* and fifos, but we want to know if this behavior ever changes.
*/
if (S_ISSOCK(mode) || S_ISFIFO(mode))
ASSERT(rdev == 0);
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = mode;
vap->va_mask = ATTR_MODE;
vap->va_rdev = rdev;
vap->va_uid = crgetfsuid(cr);
vap->va_gid = crgetfsgid(cr);
vap->va_dentry = dentry;
error = -zfs_create(dir, (char *)dentry->d_name.name,
vap, 0, mode, &ip, cr, 0, NULL);
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (-error);
}
static int
zpl_unlink(struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfs_remove(dir, dname(dentry), cr);
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
cred_t *cr = CRED();
vattr_t *vap;
struct inode *ip;
int error;
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = S_IFDIR | mode;
vap->va_mask = ATTR_MODE;
vap->va_uid = crgetfsuid(cr);
vap->va_gid = crgetfsgid(cr);
vap->va_dentry = dentry;
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_rmdir(struct inode * dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
int error;
error = -zfs_getattr_fast(dentry->d_inode, stat);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_setattr(struct dentry *dentry, struct iattr *ia)
{
cred_t *cr = CRED();
vattr_t *vap;
int error;
error = inode_change_ok(dentry->d_inode, ia);
if (error)
return (error);
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
vap->va_mode = ia->ia_mode;
vap->va_uid = ia->ia_uid;
vap->va_gid = ia->ia_gid;
vap->va_size = ia->ia_size;
vap->va_atime = ia->ia_atime;
vap->va_mtime = ia->ia_mtime;
vap->va_ctime = ia->ia_ctime;
error = -zfs_setattr(dentry->d_inode, vap, 0, cr);
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
{
cred_t *cr = CRED();
int error;
crhold(cr);
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static int
zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
{
cred_t *cr = CRED();
vattr_t *vap;
struct inode *ip;
int error;
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
vap->va_mode = S_IFLNK | S_IRWXUGO;
vap->va_mask = ATTR_MODE;
vap->va_uid = crgetfsuid(cr);
vap->va_gid = crgetfsgid(cr);
vap->va_dentry = dentry;
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
static void *
zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
{
cred_t *cr = CRED();
struct inode *ip = dentry->d_inode;
struct iovec iov;
uio_t uio;
char *link;
int error;
crhold(cr);
iov.iov_len = MAXPATHLEN;
iov.iov_base = link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_resid = (MAXPATHLEN - 1);
uio.uio_segflg = UIO_SYSSPACE;
error = -zfs_readlink(ip, &uio, cr);
if (error) {
kmem_free(link, MAXPATHLEN);
nd_set_link(nd, ERR_PTR(error));
} else {
nd_set_link(nd, link);
}
crfree(cr);
return (NULL);
}
static void
zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
{
char *link;
link = nd_get_link(nd);
if (!IS_ERR(link))
kmem_free(link, MAXPATHLEN);
}
static int
zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
struct inode *ip = old_dentry->d_inode;
int error;
if (ip->i_nlink >= ZFS_LINK_MAX)
return -EMLINK;
crhold(cr);
ip->i_ctime = CURRENT_TIME_SEC;
igrab(ip); /* Use ihold() if available */
error = -zfs_link(dir, ip, dname(dentry), cr);
if (error) {
iput(ip);
goto out;
}
d_instantiate(dentry, ip);
out:
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
const struct inode_operations zpl_inode_operations = {
.create = zpl_create,
.link = zpl_link,
.unlink = zpl_unlink,
.symlink = zpl_symlink,
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
.rename = zpl_rename,
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
const struct inode_operations zpl_dir_inode_operations = {
.create = zpl_create,
.lookup = zpl_lookup,
.link = zpl_link,
.unlink = zpl_unlink,
.symlink = zpl_symlink,
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
.rename = zpl_rename,
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
const struct inode_operations zpl_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = zpl_follow_link,
.put_link = zpl_put_link,
};
const struct inode_operations zpl_special_inode_operations = {
.setattr = zpl_setattr,
.getattr = zpl_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};