mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-14 12:10:26 +03:00
057e8eee35
There is at most a factor of 3x performance improvement to be had by using the Linux generic_fillattr() helper. However, to use it safely we need to ensure the values in a cached inode are kept rigerously up to date. Unfortunately, this isn't the case for the blksize, blocks, and atime fields. At the moment the authoritative values are still stored in the znode. This patch introduces an optimized zfs_getattr_fast() call. The idea is to use the up to date values from the inode and the blksize, block, and atime fields from the znode. At some latter date we should be able to strictly use the inode values and further improve performance. The remaining overhead in the zfs_getattr_fast() call can be attributed to having to take the znode mutex. This overhead is unavoidable until the inode is kept strictly up to date. The the careful reader will notice the we do not use the customary ZFS_ENTER()/ZFS_EXIT() macros. These macro's are designed to ensure the filesystem is not torn down in the middle of an operation. However, in this case the VFS is holding a reference on the active inode so we know this is impossible. =================== Performance Tests ======================== This test calls the fstat(2) system call 10,000,000 times on an open file description in a tight loop. The test results show the zfs stat(2) performance is now only 22% slower than ext4. This is a 2.5x improvement and there is a clear long term plan to get to parity with ext4. filesystem | test-1 test-2 test-3 | average | times-ext4 --------------+-------------------------+---------+----------- ext4 | 7.785s 7.899s 7.284s | 7.656s | 1.000x zfs-0.6.0-rc4 | 24.052s 22.531s 23.857s | 23.480s | 3.066x zfs-faststat | 9.224s 9.398s 9.485s | 9.369s | 1.223x The second test is to run 'du' of a copy of the /usr tree which contains 110514 files. The test is run multiple times both using both a cold cache (/proc/sys/vm/drop_caches) and a hot cache. As expected this change signigicantly improved the zfs hot cache performance and doesn't quite bring zfs to parity with ext4. A little surprisingly the zfs cold cache performance is better than ext4. This can probably be attributed to the zfs allocation policy of co-locating all the meta data on disk which minimizes seek times. By default the ext4 allocator will spread the data over the entire disk only co-locating each directory. filesystem | cold | hot --------------+---------+-------- ext4 | 13.318s | 1.040s zfs-0.6.0-rc4 | 4.982s | 1.762s zfs-faststat | 4.933s | 1.345s
365 lines
7.7 KiB
C
365 lines
7.7 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
|
|
*/
|
|
|
|
|
|
#include <sys/zfs_vfsops.h>
|
|
#include <sys/zfs_vnops.h>
|
|
#include <sys/vfs.h>
|
|
#include <sys/zpl.h>
|
|
|
|
|
|
static struct dentry *
|
|
zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
|
|
ASSERT3S(error, <=, 0);
|
|
crfree(cr);
|
|
|
|
if (error) {
|
|
if (error == -ENOENT)
|
|
return d_splice_alias(NULL, dentry);
|
|
else
|
|
return ERR_PTR(error);
|
|
}
|
|
|
|
return d_splice_alias(ip, dentry);
|
|
}
|
|
|
|
static int
|
|
zpl_create(struct inode *dir, struct dentry *dentry, int mode,
|
|
struct nameidata *nd)
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip;
|
|
vattr_t *vap;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
vap->va_mode = mode;
|
|
vap->va_mask = ATTR_MODE;
|
|
vap->va_uid = crgetfsuid(cr);
|
|
vap->va_gid = crgetfsgid(cr);
|
|
vap->va_dentry = dentry;
|
|
|
|
error = -zfs_create(dir, (char *)dentry->d_name.name,
|
|
vap, 0, mode, &ip, cr, 0, NULL);
|
|
kmem_free(vap, sizeof(vattr_t));
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip;
|
|
vattr_t *vap;
|
|
int error;
|
|
|
|
/*
|
|
* We currently expect Linux to supply rdev=0 for all sockets
|
|
* and fifos, but we want to know if this behavior ever changes.
|
|
*/
|
|
if (S_ISSOCK(mode) || S_ISFIFO(mode))
|
|
ASSERT(rdev == 0);
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
vap->va_mode = mode;
|
|
vap->va_mask = ATTR_MODE;
|
|
vap->va_rdev = rdev;
|
|
vap->va_uid = crgetfsuid(cr);
|
|
vap->va_gid = crgetfsgid(cr);
|
|
vap->va_dentry = dentry;
|
|
|
|
error = -zfs_create(dir, (char *)dentry->d_name.name,
|
|
vap, 0, mode, &ip, cr, 0, NULL);
|
|
kmem_free(vap, sizeof(vattr_t));
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (-error);
|
|
}
|
|
|
|
static int
|
|
zpl_unlink(struct inode *dir, struct dentry *dentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfs_remove(dir, dname(dentry), cr);
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|
{
|
|
cred_t *cr = CRED();
|
|
vattr_t *vap;
|
|
struct inode *ip;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
vap->va_mode = S_IFDIR | mode;
|
|
vap->va_mask = ATTR_MODE;
|
|
vap->va_uid = crgetfsuid(cr);
|
|
vap->va_gid = crgetfsgid(cr);
|
|
vap->va_dentry = dentry;
|
|
|
|
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
|
|
kmem_free(vap, sizeof(vattr_t));
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_rmdir(struct inode * dir, struct dentry *dentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
|
|
{
|
|
int error;
|
|
|
|
error = -zfs_getattr_fast(dentry->d_inode, stat);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_setattr(struct dentry *dentry, struct iattr *ia)
|
|
{
|
|
cred_t *cr = CRED();
|
|
vattr_t *vap;
|
|
int error;
|
|
|
|
error = inode_change_ok(dentry->d_inode, ia);
|
|
if (error)
|
|
return (error);
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
|
|
vap->va_mode = ia->ia_mode;
|
|
vap->va_uid = ia->ia_uid;
|
|
vap->va_gid = ia->ia_gid;
|
|
vap->va_size = ia->ia_size;
|
|
vap->va_atime = ia->ia_atime;
|
|
vap->va_mtime = ia->ia_mtime;
|
|
vap->va_ctime = ia->ia_ctime;
|
|
|
|
error = -zfs_setattr(dentry->d_inode, vap, 0, cr);
|
|
|
|
kmem_free(vap, sizeof(vattr_t));
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_rename(struct inode *sdip, struct dentry *sdentry,
|
|
struct inode *tdip, struct dentry *tdentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
int error;
|
|
|
|
crhold(cr);
|
|
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
|
|
{
|
|
cred_t *cr = CRED();
|
|
vattr_t *vap;
|
|
struct inode *ip;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
|
|
vap->va_mode = S_IFLNK | S_IRWXUGO;
|
|
vap->va_mask = ATTR_MODE;
|
|
vap->va_uid = crgetfsuid(cr);
|
|
vap->va_gid = crgetfsgid(cr);
|
|
vap->va_dentry = dentry;
|
|
|
|
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
|
|
kmem_free(vap, sizeof(vattr_t));
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void *
|
|
zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip = dentry->d_inode;
|
|
struct iovec iov;
|
|
uio_t uio;
|
|
char *link;
|
|
int error;
|
|
|
|
crhold(cr);
|
|
|
|
iov.iov_len = MAXPATHLEN;
|
|
iov.iov_base = link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
|
|
|
|
uio.uio_iov = &iov;
|
|
uio.uio_iovcnt = 1;
|
|
uio.uio_resid = (MAXPATHLEN - 1);
|
|
uio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
error = -zfs_readlink(ip, &uio, cr);
|
|
if (error) {
|
|
kmem_free(link, MAXPATHLEN);
|
|
nd_set_link(nd, ERR_PTR(error));
|
|
} else {
|
|
nd_set_link(nd, link);
|
|
}
|
|
|
|
crfree(cr);
|
|
return (NULL);
|
|
}
|
|
|
|
static void
|
|
zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
|
|
{
|
|
char *link;
|
|
|
|
link = nd_get_link(nd);
|
|
if (!IS_ERR(link))
|
|
kmem_free(link, MAXPATHLEN);
|
|
}
|
|
|
|
static int
|
|
zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
|
{
|
|
cred_t *cr = CRED();
|
|
struct inode *ip = old_dentry->d_inode;
|
|
int error;
|
|
|
|
if (ip->i_nlink >= ZFS_LINK_MAX)
|
|
return -EMLINK;
|
|
|
|
crhold(cr);
|
|
ip->i_ctime = CURRENT_TIME_SEC;
|
|
igrab(ip); /* Use ihold() if available */
|
|
|
|
error = -zfs_link(dir, ip, dname(dentry), cr);
|
|
if (error) {
|
|
iput(ip);
|
|
goto out;
|
|
}
|
|
|
|
d_instantiate(dentry, ip);
|
|
out:
|
|
crfree(cr);
|
|
ASSERT3S(error, <=, 0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
const struct inode_operations zpl_inode_operations = {
|
|
.create = zpl_create,
|
|
.link = zpl_link,
|
|
.unlink = zpl_unlink,
|
|
.symlink = zpl_symlink,
|
|
.mkdir = zpl_mkdir,
|
|
.rmdir = zpl_rmdir,
|
|
.mknod = zpl_mknod,
|
|
.rename = zpl_rename,
|
|
.setattr = zpl_setattr,
|
|
.getattr = zpl_getattr,
|
|
.setxattr = generic_setxattr,
|
|
.getxattr = generic_getxattr,
|
|
.removexattr = generic_removexattr,
|
|
.listxattr = zpl_xattr_list,
|
|
};
|
|
|
|
const struct inode_operations zpl_dir_inode_operations = {
|
|
.create = zpl_create,
|
|
.lookup = zpl_lookup,
|
|
.link = zpl_link,
|
|
.unlink = zpl_unlink,
|
|
.symlink = zpl_symlink,
|
|
.mkdir = zpl_mkdir,
|
|
.rmdir = zpl_rmdir,
|
|
.mknod = zpl_mknod,
|
|
.rename = zpl_rename,
|
|
.setattr = zpl_setattr,
|
|
.getattr = zpl_getattr,
|
|
.setxattr = generic_setxattr,
|
|
.getxattr = generic_getxattr,
|
|
.removexattr = generic_removexattr,
|
|
.listxattr = zpl_xattr_list,
|
|
};
|
|
|
|
const struct inode_operations zpl_symlink_inode_operations = {
|
|
.readlink = generic_readlink,
|
|
.follow_link = zpl_follow_link,
|
|
.put_link = zpl_put_link,
|
|
};
|
|
|
|
const struct inode_operations zpl_special_inode_operations = {
|
|
.setattr = zpl_setattr,
|
|
.getattr = zpl_getattr,
|
|
.setxattr = generic_setxattr,
|
|
.getxattr = generic_getxattr,
|
|
.removexattr = generic_removexattr,
|
|
.listxattr = zpl_xattr_list,
|
|
};
|