mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
OpenZFS restructuring - move platform specific sources
Move platform specific Linux source under module/os/linux/
and update the build system accordingly. Additional code
restructuring will follow to make the common code fully
portable.
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Igor Kozhukhov <igor@dilos.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Macy <mmacy@FreeBSD.org>
Closes #9206
This commit is contained in:
committed by
Brian Behlendorf
parent
870e7a52c1
commit
bced7e3aaa
@@ -0,0 +1,34 @@
|
||||
#
|
||||
# Linux specific sources included from module/zfs/Makefile.in
|
||||
#
|
||||
|
||||
# Suppress unused-value warnings in sparc64 architecture headers
|
||||
ifeq ($(target_cpu),sparc64)
|
||||
ccflags-y += -Wno-unused-value
|
||||
endif
|
||||
|
||||
ccflags-y += -I@abs_top_srcdir@/module/os/linux/zfs
|
||||
|
||||
$(MODULE)-objs += ../os/linux/zfs/abd.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/policy.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/qat.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/qat_compress.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/qat_crypt.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/spa_stats.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/vdev_disk.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/vdev_file.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_acl.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_debug.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_vnops.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zfs_znode.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zio_crypt.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_ctldir.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_export.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_file.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_inode.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_super.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/zpl_xattr.o
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,355 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2013, Joyent, Inc. All rights reserved.
|
||||
* Copyright (C) 2016 Lawrence Livermore National Security, LLC.
|
||||
*
|
||||
* For Linux the vast majority of this enforcement is already handled via
|
||||
* the standard Linux VFS permission checks. However certain administrative
|
||||
* commands which bypass the standard mechanisms may need to make use of
|
||||
* this functionality.
|
||||
*/
|
||||
|
||||
#include <sys/policy.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/vfs_compat.h>
|
||||
|
||||
/*
|
||||
* The passed credentials cannot be directly verified because Linux only
|
||||
* provides and interface to check the *current* process credentials. In
|
||||
* order to handle this the capable() test is only run when the passed
|
||||
* credentials match the current process credentials or the kcred. In
|
||||
* all other cases this function must fail and return the passed err.
|
||||
*/
|
||||
static int
|
||||
priv_policy_ns(const cred_t *cr, int capability, boolean_t all, int err,
|
||||
struct user_namespace *ns)
|
||||
{
|
||||
ASSERT3S(all, ==, B_FALSE);
|
||||
|
||||
if (cr != CRED() && (cr != kcred))
|
||||
return (err);
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_NS_CAPABLE)
|
||||
if (!(ns ? ns_capable(ns, capability) : capable(capability)))
|
||||
#else
|
||||
if (!capable(capability))
|
||||
#endif
|
||||
return (err);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
priv_policy(const cred_t *cr, int capability, boolean_t all, int err)
|
||||
{
|
||||
return (priv_policy_ns(cr, capability, all, err, NULL));
|
||||
}
|
||||
|
||||
static int
|
||||
priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err)
|
||||
{
|
||||
/*
|
||||
* All priv_policy_user checks are preceded by kuid/kgid_has_mapping()
|
||||
* checks. If we cannot do them, we shouldn't be using ns_capable()
|
||||
* since we don't know whether the affected files are valid in our
|
||||
* namespace. Note that kuid_has_mapping() came after cred->user_ns, so
|
||||
* we shouldn't need to re-check for HAVE_CRED_USER_NS
|
||||
*/
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
return (priv_policy_ns(cr, capability, all, err, cr->user_ns));
|
||||
#else
|
||||
return (priv_policy_ns(cr, capability, all, err, NULL));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks for operations that are either client-only or are used by
|
||||
* both clients and servers.
|
||||
*/
|
||||
int
|
||||
secpolicy_nfs(const cred_t *cr)
|
||||
{
|
||||
return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Catch all system configuration.
|
||||
*/
|
||||
int
|
||||
secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
|
||||
{
|
||||
return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Like secpolicy_vnode_access() but we get the actual wanted mode and the
|
||||
* current mode of the file, not the missing bits.
|
||||
*
|
||||
* Enforced in the Linux VFS.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
|
||||
mode_t curmode, mode_t wantmode)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a special routine for ZFS; it is used to determine whether
|
||||
* any of the privileges in effect allow any form of access to the
|
||||
* file. There's no reason to audit this or any reason to record
|
||||
* this. More work is needed to do the "KPLD" stuff.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
|
||||
{
|
||||
if (crgetfsuid(cr) == owner)
|
||||
return (0);
|
||||
|
||||
if (zpl_inode_owner_or_capable(ip))
|
||||
return (0);
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
|
||||
return (EPERM);
|
||||
#endif
|
||||
|
||||
if (priv_policy_user(cr, CAP_DAC_OVERRIDE, B_FALSE, EPERM) == 0)
|
||||
return (0);
|
||||
|
||||
if (priv_policy_user(cr, CAP_DAC_READ_SEARCH, B_FALSE, EPERM) == 0)
|
||||
return (0);
|
||||
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if subject can chown owner of a file.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
|
||||
{
|
||||
if (crgetfsuid(cr) == owner)
|
||||
return (0);
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
|
||||
return (EPERM);
|
||||
#endif
|
||||
|
||||
return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if subject can change group ownership of a file.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_create_gid(const cred_t *cr)
|
||||
{
|
||||
return (priv_policy(cr, CAP_SETGID, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Policy determines whether we can remove an entry from a directory,
|
||||
* regardless of permission bits.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_remove(const cred_t *cr)
|
||||
{
|
||||
return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine that subject can modify the mode of a file. allzone privilege
|
||||
* needed when modifying root owned object.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
|
||||
{
|
||||
if (crgetfsuid(cr) == owner)
|
||||
return (0);
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
|
||||
return (EPERM);
|
||||
#endif
|
||||
|
||||
return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Are we allowed to retain the set-uid/set-gid bits when
|
||||
* changing ownership or when writing to a file?
|
||||
* "issuid" should be true when set-uid; only in that case
|
||||
* root ownership is checked (setgid is assumed).
|
||||
*
|
||||
* Enforced in the Linux VFS.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
|
||||
{
|
||||
return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine that subject can set the file setgid flag.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
|
||||
return (EPERM);
|
||||
#endif
|
||||
if (crgetfsgid(cr) != gid && !groupmember(gid, cr))
|
||||
return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if the subject can inject faults in the ZFS fault injection
|
||||
* framework. Requires all privileges.
|
||||
*/
|
||||
int
|
||||
secpolicy_zinject(const cred_t *cr)
|
||||
{
|
||||
return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if the subject has permission to manipulate ZFS datasets
|
||||
* (not pools). Equivalent to the SYS_MOUNT privilege.
|
||||
*/
|
||||
int
|
||||
secpolicy_zfs(const cred_t *cr)
|
||||
{
|
||||
return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
|
||||
}
|
||||
|
||||
void
|
||||
secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
|
||||
{
|
||||
if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
|
||||
secpolicy_vnode_setid_retain(cr,
|
||||
(vap->va_mode & S_ISUID) != 0 &&
|
||||
(vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
|
||||
vap->va_mask |= AT_MODE;
|
||||
vap->va_mode &= ~(S_ISUID|S_ISGID);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine that subject can set the file setid flags.
|
||||
*/
|
||||
static int
|
||||
secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
|
||||
{
|
||||
if (crgetfsuid(cr) == owner)
|
||||
return (0);
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
|
||||
if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
|
||||
return (EPERM);
|
||||
#endif
|
||||
|
||||
return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine that subject can make a file a "sticky".
|
||||
*
|
||||
* Enforced in the Linux VFS.
|
||||
*/
|
||||
static int
|
||||
secpolicy_vnode_stky_modify(const cred_t *cr)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
|
||||
const vattr_t *ovap, cred_t *cr)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((vap->va_mode & S_ISUID) != 0 &&
|
||||
(error = secpolicy_vnode_setid_modify(cr,
|
||||
ovap->va_uid)) != 0) {
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check privilege if attempting to set the
|
||||
* sticky bit on a non-directory.
|
||||
*/
|
||||
if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
|
||||
secpolicy_vnode_stky_modify(cr) != 0) {
|
||||
vap->va_mode &= ~S_ISVTX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for privilege if attempting to set the
|
||||
* group-id bit.
|
||||
*/
|
||||
if ((vap->va_mode & S_ISGID) != 0 &&
|
||||
secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
|
||||
vap->va_mode &= ~S_ISGID;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check privileges for setting xvattr attributes
|
||||
*/
|
||||
int
|
||||
secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
|
||||
{
|
||||
return (secpolicy_vnode_chown(cr, owner));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check privileges for setattr attributes.
|
||||
*
|
||||
* Enforced in the Linux VFS.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
|
||||
const struct vattr *ovap, int flags,
|
||||
int unlocked_access(void *, int, cred_t *), void *node)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check privileges for links.
|
||||
*
|
||||
* Enforced in the Linux VFS.
|
||||
*/
|
||||
int
|
||||
secpolicy_basic_link(const cred_t *cr)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_QAT)
|
||||
#include <sys/zfs_context.h>
|
||||
#include "qat.h"
|
||||
|
||||
qat_stats_t qat_stats = {
|
||||
{ "comp_requests", KSTAT_DATA_UINT64 },
|
||||
{ "comp_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "comp_total_out_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "decomp_requests", KSTAT_DATA_UINT64 },
|
||||
{ "decomp_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "decomp_total_out_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "dc_fails", KSTAT_DATA_UINT64 },
|
||||
{ "encrypt_requests", KSTAT_DATA_UINT64 },
|
||||
{ "encrypt_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "encrypt_total_out_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "decrypt_requests", KSTAT_DATA_UINT64 },
|
||||
{ "decrypt_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "decrypt_total_out_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "crypt_fails", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_requests", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_total_in_bytes", KSTAT_DATA_UINT64 },
|
||||
{ "cksum_fails", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
static kstat_t *qat_ksp = NULL;
|
||||
|
||||
CpaStatus
|
||||
qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
|
||||
{
|
||||
*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
|
||||
if (*pp_mem_addr == NULL)
|
||||
return (CPA_STATUS_RESOURCE);
|
||||
return (CPA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void
|
||||
qat_mem_free_contig(void **pp_mem_addr)
|
||||
{
|
||||
if (*pp_mem_addr != NULL) {
|
||||
kfree(*pp_mem_addr);
|
||||
*pp_mem_addr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
qat_init(void)
|
||||
{
|
||||
qat_ksp = kstat_create("zfs", 0, "qat", "misc",
|
||||
KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
|
||||
KSTAT_FLAG_VIRTUAL);
|
||||
if (qat_ksp != NULL) {
|
||||
qat_ksp->ks_data = &qat_stats;
|
||||
kstat_install(qat_ksp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Just set the disable flag when qat init failed, qat can be
|
||||
* turned on again in post-process after zfs module is loaded, e.g.:
|
||||
* echo 0 > /sys/module/zfs/parameters/zfs_qat_compress_disable
|
||||
*/
|
||||
if (qat_dc_init() != 0)
|
||||
zfs_qat_compress_disable = 1;
|
||||
|
||||
if (qat_cy_init() != 0) {
|
||||
zfs_qat_checksum_disable = 1;
|
||||
zfs_qat_encrypt_disable = 1;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
qat_fini(void)
|
||||
{
|
||||
if (qat_ksp != NULL) {
|
||||
kstat_delete(qat_ksp);
|
||||
qat_ksp = NULL;
|
||||
}
|
||||
|
||||
qat_cy_fini();
|
||||
qat_dc_fini();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,574 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_QAT)
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/completion.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/zio.h>
|
||||
#include "qat.h"
|
||||
|
||||
/*
|
||||
* Max instances in a QAT device, each instance is a channel to submit
|
||||
* jobs to QAT hardware, this is only for pre-allocating instance and
|
||||
* session arrays; the actual number of instances are defined in the
|
||||
* QAT driver's configuration file.
|
||||
*/
|
||||
#define QAT_DC_MAX_INSTANCES 48
|
||||
|
||||
/*
|
||||
* ZLIB head and foot size
|
||||
*/
|
||||
#define ZLIB_HEAD_SZ 2
|
||||
#define ZLIB_FOOT_SZ 4
|
||||
|
||||
static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
|
||||
static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
|
||||
static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
|
||||
static Cpa16U num_inst = 0;
|
||||
static Cpa32U inst_num = 0;
|
||||
static boolean_t qat_dc_init_done = B_FALSE;
|
||||
int zfs_qat_compress_disable = 0;
|
||||
|
||||
boolean_t
|
||||
qat_dc_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_compress_disable &&
|
||||
qat_dc_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
qat_dc_callback(void *p_callback, CpaStatus status)
|
||||
{
|
||||
if (p_callback != NULL)
|
||||
complete((struct completion *)p_callback);
|
||||
}
|
||||
|
||||
static void
|
||||
qat_dc_clean(void)
|
||||
{
|
||||
Cpa16U buff_num = 0;
|
||||
Cpa16U num_inter_buff_lists = 0;
|
||||
|
||||
for (Cpa16U i = 0; i < num_inst; i++) {
|
||||
cpaDcStopInstance(dc_inst_handles[i]);
|
||||
QAT_PHYS_CONTIG_FREE(session_handles[i]);
|
||||
/* free intermediate buffers */
|
||||
if (buffer_array[i] != NULL) {
|
||||
cpaDcGetNumIntermediateBuffers(
|
||||
dc_inst_handles[i], &num_inter_buff_lists);
|
||||
for (buff_num = 0; buff_num < num_inter_buff_lists;
|
||||
buff_num++) {
|
||||
CpaBufferList *buffer_inter =
|
||||
buffer_array[i][buff_num];
|
||||
if (buffer_inter->pBuffers) {
|
||||
QAT_PHYS_CONTIG_FREE(
|
||||
buffer_inter->pBuffers->pData);
|
||||
QAT_PHYS_CONTIG_FREE(
|
||||
buffer_inter->pBuffers);
|
||||
}
|
||||
QAT_PHYS_CONTIG_FREE(
|
||||
buffer_inter->pPrivateMetaData);
|
||||
QAT_PHYS_CONTIG_FREE(buffer_inter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
num_inst = 0;
|
||||
qat_dc_init_done = B_FALSE;
|
||||
}
|
||||
|
||||
int
|
||||
qat_dc_init(void)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U sess_size = 0;
|
||||
Cpa32U ctx_size = 0;
|
||||
Cpa16U num_inter_buff_lists = 0;
|
||||
Cpa16U buff_num = 0;
|
||||
Cpa32U buff_meta_size = 0;
|
||||
CpaDcSessionSetupData sd = {0};
|
||||
|
||||
if (qat_dc_init_done)
|
||||
return (0);
|
||||
|
||||
status = cpaDcGetNumInstances(&num_inst);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (-1);
|
||||
|
||||
/* if the user has configured no QAT compression units just return */
|
||||
if (num_inst == 0)
|
||||
return (0);
|
||||
|
||||
if (num_inst > QAT_DC_MAX_INSTANCES)
|
||||
num_inst = QAT_DC_MAX_INSTANCES;
|
||||
|
||||
status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (-1);
|
||||
|
||||
for (Cpa16U i = 0; i < num_inst; i++) {
|
||||
cpaDcSetAddressTranslation(dc_inst_handles[i],
|
||||
(void*)virt_to_phys);
|
||||
|
||||
status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
|
||||
1, &buff_meta_size);
|
||||
|
||||
if (status == CPA_STATUS_SUCCESS)
|
||||
status = cpaDcGetNumIntermediateBuffers(
|
||||
dc_inst_handles[i], &num_inter_buff_lists);
|
||||
|
||||
if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
|
||||
num_inter_buff_lists *
|
||||
sizeof (CpaBufferList *));
|
||||
|
||||
for (buff_num = 0; buff_num < num_inter_buff_lists;
|
||||
buff_num++) {
|
||||
if (status == CPA_STATUS_SUCCESS)
|
||||
status = QAT_PHYS_CONTIG_ALLOC(
|
||||
&buffer_array[i][buff_num],
|
||||
sizeof (CpaBufferList));
|
||||
|
||||
if (status == CPA_STATUS_SUCCESS)
|
||||
status = QAT_PHYS_CONTIG_ALLOC(
|
||||
&buffer_array[i][buff_num]->
|
||||
pPrivateMetaData,
|
||||
buff_meta_size);
|
||||
|
||||
if (status == CPA_STATUS_SUCCESS)
|
||||
status = QAT_PHYS_CONTIG_ALLOC(
|
||||
&buffer_array[i][buff_num]->pBuffers,
|
||||
sizeof (CpaFlatBuffer));
|
||||
|
||||
if (status == CPA_STATUS_SUCCESS) {
|
||||
/*
|
||||
* implementation requires an intermediate
|
||||
* buffer approximately twice the size of
|
||||
* output buffer, which is 2x max buffer
|
||||
* size here.
|
||||
*/
|
||||
status = QAT_PHYS_CONTIG_ALLOC(
|
||||
&buffer_array[i][buff_num]->pBuffers->
|
||||
pData, 2 * QAT_MAX_BUF_SIZE);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
buffer_array[i][buff_num]->numBuffers = 1;
|
||||
buffer_array[i][buff_num]->pBuffers->
|
||||
dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
status = cpaDcStartInstance(dc_inst_handles[i],
|
||||
num_inter_buff_lists, buffer_array[i]);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
sd.compLevel = CPA_DC_L1;
|
||||
sd.compType = CPA_DC_DEFLATE;
|
||||
sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
|
||||
sd.sessDirection = CPA_DC_DIR_COMBINED;
|
||||
sd.sessState = CPA_DC_STATELESS;
|
||||
sd.deflateWindowSize = 7;
|
||||
sd.checksum = CPA_DC_ADLER32;
|
||||
status = cpaDcGetSessionSize(dc_inst_handles[i],
|
||||
&sd, &sess_size, &ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
|
||||
if (session_handles[i] == NULL)
|
||||
goto fail;
|
||||
|
||||
status = cpaDcInitSession(dc_inst_handles[i],
|
||||
session_handles[i],
|
||||
&sd, NULL, qat_dc_callback);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
qat_dc_init_done = B_TRUE;
|
||||
return (0);
|
||||
fail:
|
||||
qat_dc_clean();
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
qat_dc_fini(void)
|
||||
{
|
||||
if (!qat_dc_init_done)
|
||||
return;
|
||||
|
||||
qat_dc_clean();
|
||||
}
|
||||
|
||||
/*
|
||||
* The "add" parameter is an additional buffer which is passed
|
||||
* to QAT as a scratch buffer alongside the destination buffer
|
||||
* in case the "compressed" data ends up being larger than the
|
||||
* original source data. This is necessary to prevent QAT from
|
||||
* generating buffer overflow warnings for incompressible data.
|
||||
*/
|
||||
static int
|
||||
qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
|
||||
char *dst, int dst_len, char *add, int add_len, size_t *c_len)
|
||||
{
|
||||
CpaInstanceHandle dc_inst_handle;
|
||||
CpaDcSessionHandle session_handle;
|
||||
CpaBufferList *buf_list_src = NULL;
|
||||
CpaBufferList *buf_list_dst = NULL;
|
||||
CpaFlatBuffer *flat_buf_src = NULL;
|
||||
CpaFlatBuffer *flat_buf_dst = NULL;
|
||||
Cpa8U *buffer_meta_src = NULL;
|
||||
Cpa8U *buffer_meta_dst = NULL;
|
||||
Cpa32U buffer_meta_size = 0;
|
||||
CpaDcRqResults dc_results;
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U hdr_sz = 0;
|
||||
Cpa32U compressed_sz;
|
||||
Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
|
||||
Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
|
||||
Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
|
||||
Cpa32U bytes_left;
|
||||
Cpa32U dst_pages = 0;
|
||||
Cpa32U adler32 = 0;
|
||||
char *data;
|
||||
struct page *page;
|
||||
struct page **in_pages = NULL;
|
||||
struct page **out_pages = NULL;
|
||||
struct page **add_pages = NULL;
|
||||
Cpa32U page_off = 0;
|
||||
struct completion complete;
|
||||
Cpa32U page_num = 0;
|
||||
Cpa16U i;
|
||||
|
||||
/*
|
||||
* We increment num_src_buf and num_dst_buf by 2 to allow
|
||||
* us to handle non page-aligned buffer addresses and buffers
|
||||
* whose sizes are not divisible by PAGE_SIZE.
|
||||
*/
|
||||
Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
|
||||
(num_src_buf * sizeof (CpaFlatBuffer));
|
||||
Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
|
||||
((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
|
||||
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&in_pages,
|
||||
num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&out_pages,
|
||||
num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&add_pages,
|
||||
num_add_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
|
||||
dc_inst_handle = dc_inst_handles[i];
|
||||
session_handle = session_handles[i];
|
||||
|
||||
cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
|
||||
&buffer_meta_size);
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
|
||||
CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
|
||||
&buffer_meta_size);
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
|
||||
CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* build source buffer list */
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
|
||||
CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
|
||||
|
||||
buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
|
||||
|
||||
/* build destination buffer list */
|
||||
if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
|
||||
CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
|
||||
|
||||
buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
|
||||
|
||||
buf_list_src->numBuffers = 0;
|
||||
buf_list_src->pPrivateMetaData = buffer_meta_src;
|
||||
bytes_left = src_len;
|
||||
data = src;
|
||||
page_num = 0;
|
||||
while (bytes_left > 0) {
|
||||
page_off = ((long)data & ~PAGE_MASK);
|
||||
page = qat_mem_to_page(data);
|
||||
in_pages[page_num] = page;
|
||||
flat_buf_src->pData = kmap(page) + page_off;
|
||||
flat_buf_src->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - page_off, (long)bytes_left);
|
||||
|
||||
bytes_left -= flat_buf_src->dataLenInBytes;
|
||||
data += flat_buf_src->dataLenInBytes;
|
||||
flat_buf_src++;
|
||||
buf_list_src->numBuffers++;
|
||||
page_num++;
|
||||
}
|
||||
|
||||
buf_list_dst->numBuffers = 0;
|
||||
buf_list_dst->pPrivateMetaData = buffer_meta_dst;
|
||||
bytes_left = dst_len;
|
||||
data = dst;
|
||||
page_num = 0;
|
||||
while (bytes_left > 0) {
|
||||
page_off = ((long)data & ~PAGE_MASK);
|
||||
page = qat_mem_to_page(data);
|
||||
flat_buf_dst->pData = kmap(page) + page_off;
|
||||
out_pages[page_num] = page;
|
||||
flat_buf_dst->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - page_off, (long)bytes_left);
|
||||
|
||||
bytes_left -= flat_buf_dst->dataLenInBytes;
|
||||
data += flat_buf_dst->dataLenInBytes;
|
||||
flat_buf_dst++;
|
||||
buf_list_dst->numBuffers++;
|
||||
page_num++;
|
||||
dst_pages++;
|
||||
}
|
||||
|
||||
/* map additional scratch pages into the destination buffer list */
|
||||
bytes_left = add_len;
|
||||
data = add;
|
||||
page_num = 0;
|
||||
while (bytes_left > 0) {
|
||||
page_off = ((long)data & ~PAGE_MASK);
|
||||
page = qat_mem_to_page(data);
|
||||
flat_buf_dst->pData = kmap(page) + page_off;
|
||||
add_pages[page_num] = page;
|
||||
flat_buf_dst->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - page_off, (long)bytes_left);
|
||||
|
||||
bytes_left -= flat_buf_dst->dataLenInBytes;
|
||||
data += flat_buf_dst->dataLenInBytes;
|
||||
flat_buf_dst++;
|
||||
buf_list_dst->numBuffers++;
|
||||
page_num++;
|
||||
}
|
||||
|
||||
init_completion(&complete);
|
||||
|
||||
if (dir == QAT_COMPRESS) {
|
||||
QAT_STAT_BUMP(comp_requests);
|
||||
QAT_STAT_INCR(comp_total_in_bytes, src_len);
|
||||
|
||||
cpaDcGenerateHeader(session_handle,
|
||||
buf_list_dst->pBuffers, &hdr_sz);
|
||||
buf_list_dst->pBuffers->pData += hdr_sz;
|
||||
buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
|
||||
status = cpaDcCompressData(
|
||||
dc_inst_handle, session_handle,
|
||||
buf_list_src, buf_list_dst,
|
||||
&dc_results, CPA_DC_FLUSH_FINAL,
|
||||
&complete);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* we now wait until the completion of the operation. */
|
||||
if (!wait_for_completion_interruptible_timeout(&complete,
|
||||
QAT_TIMEOUT_MS)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (dc_results.status != CPA_STATUS_SUCCESS) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
compressed_sz = dc_results.produced;
|
||||
if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
|
||||
status = CPA_STATUS_INCOMPRESSIBLE;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
|
||||
/* move to the last page */
|
||||
flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
|
||||
|
||||
/* no space for gzip footer in the last page */
|
||||
if (((compressed_sz + hdr_sz) % PAGE_SIZE)
|
||||
+ ZLIB_FOOT_SZ > PAGE_SIZE) {
|
||||
status = CPA_STATUS_INCOMPRESSIBLE;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* jump to the end of the buffer and append footer */
|
||||
flat_buf_dst->pData =
|
||||
(char *)((unsigned long)flat_buf_dst->pData & PAGE_MASK)
|
||||
+ ((compressed_sz + hdr_sz) % PAGE_SIZE);
|
||||
flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
|
||||
|
||||
dc_results.produced = 0;
|
||||
status = cpaDcGenerateFooter(session_handle,
|
||||
flat_buf_dst, &dc_results);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
*c_len = compressed_sz + dc_results.produced + hdr_sz;
|
||||
QAT_STAT_INCR(comp_total_out_bytes, *c_len);
|
||||
} else {
|
||||
ASSERT3U(dir, ==, QAT_DECOMPRESS);
|
||||
QAT_STAT_BUMP(decomp_requests);
|
||||
QAT_STAT_INCR(decomp_total_in_bytes, src_len);
|
||||
|
||||
buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
|
||||
buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
|
||||
status = cpaDcDecompressData(dc_inst_handle, session_handle,
|
||||
buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
|
||||
&complete);
|
||||
|
||||
if (CPA_STATUS_SUCCESS != status) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* we now wait until the completion of the operation. */
|
||||
if (!wait_for_completion_interruptible_timeout(&complete,
|
||||
QAT_TIMEOUT_MS)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (dc_results.status != CPA_STATUS_SUCCESS) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* verify adler checksum */
|
||||
adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
|
||||
if (adler32 != BSWAP_32(dc_results.checksum)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
*c_len = dc_results.produced;
|
||||
QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
|
||||
}
|
||||
|
||||
fail:
|
||||
if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
|
||||
QAT_STAT_BUMP(dc_fails);
|
||||
|
||||
if (in_pages) {
|
||||
for (page_num = 0;
|
||||
page_num < buf_list_src->numBuffers;
|
||||
page_num++) {
|
||||
kunmap(in_pages[page_num]);
|
||||
}
|
||||
QAT_PHYS_CONTIG_FREE(in_pages);
|
||||
}
|
||||
|
||||
if (out_pages) {
|
||||
for (page_num = 0; page_num < dst_pages; page_num++) {
|
||||
kunmap(out_pages[page_num]);
|
||||
}
|
||||
QAT_PHYS_CONTIG_FREE(out_pages);
|
||||
}
|
||||
|
||||
if (add_pages) {
|
||||
for (page_num = 0;
|
||||
page_num < buf_list_dst->numBuffers - dst_pages;
|
||||
page_num++) {
|
||||
kunmap(add_pages[page_num]);
|
||||
}
|
||||
QAT_PHYS_CONTIG_FREE(add_pages);
|
||||
}
|
||||
|
||||
QAT_PHYS_CONTIG_FREE(buffer_meta_src);
|
||||
QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
|
||||
QAT_PHYS_CONTIG_FREE(buf_list_src);
|
||||
QAT_PHYS_CONTIG_FREE(buf_list_dst);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Entry point for QAT accelerated compression / decompression.
|
||||
*/
|
||||
int
|
||||
qat_compress(qat_compress_dir_t dir, char *src, int src_len,
|
||||
char *dst, int dst_len, size_t *c_len)
|
||||
{
|
||||
int ret;
|
||||
size_t add_len = 0;
|
||||
void *add = NULL;
|
||||
|
||||
if (dir == QAT_COMPRESS) {
|
||||
add_len = dst_len;
|
||||
add = zio_data_buf_alloc(add_len);
|
||||
}
|
||||
|
||||
ret = qat_compress_impl(dir, src, src_len, dst,
|
||||
dst_len, add, add_len, c_len);
|
||||
|
||||
if (dir == QAT_COMPRESS)
|
||||
zio_data_buf_free(add, add_len);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int ret;
|
||||
int *pvalue = kp->arg;
|
||||
ret = param_set_int(val, kp);
|
||||
if (ret)
|
||||
return (ret);
|
||||
/*
|
||||
* zfs_qat_compress_disable = 0: enable qat compress
|
||||
* try to initialize qat instance if it has not been done
|
||||
*/
|
||||
if (*pvalue == 0 && !qat_dc_init_done) {
|
||||
ret = qat_dc_init();
|
||||
if (ret != 0) {
|
||||
zfs_qat_compress_disable = 1;
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
|
||||
param_get_int, &zfs_qat_compress_disable, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,631 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file represents the QAT implementation of checksums and encryption.
|
||||
* Internally, QAT shares the same cryptographic instances for both of these
|
||||
* operations, so the code has been combined here. QAT data compression uses
|
||||
* compression instances, so that code is separated into qat_compress.c
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_QAT)
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/completion.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio_crypt.h>
|
||||
#include "lac/cpa_cy_im.h"
|
||||
#include "lac/cpa_cy_common.h"
|
||||
#include "qat.h"
|
||||
|
||||
/*
|
||||
* Max instances in a QAT device, each instance is a channel to submit
|
||||
* jobs to QAT hardware, this is only for pre-allocating instances
|
||||
* and session arrays; the actual number of instances are defined in
|
||||
* the QAT driver's configure file.
|
||||
*/
|
||||
#define QAT_CRYPT_MAX_INSTANCES 48
|
||||
|
||||
#define MAX_PAGE_NUM 1024
|
||||
|
||||
static Cpa32U inst_num = 0;
|
||||
static Cpa16U num_inst = 0;
|
||||
static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
|
||||
static boolean_t qat_cy_init_done = B_FALSE;
|
||||
int zfs_qat_encrypt_disable = 0;
|
||||
int zfs_qat_checksum_disable = 0;
|
||||
|
||||
typedef struct cy_callback {
|
||||
CpaBoolean verify_result;
|
||||
struct completion complete;
|
||||
} cy_callback_t;
|
||||
|
||||
static void
|
||||
symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
|
||||
void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
|
||||
{
|
||||
cy_callback_t *cb = p_callback;
|
||||
|
||||
if (cb != NULL) {
|
||||
/* indicate that the function has been called */
|
||||
cb->verify_result = verify;
|
||||
complete(&cb->complete);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t
|
||||
qat_crypt_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_encrypt_disable &&
|
||||
qat_cy_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
qat_checksum_use_accel(size_t s_len)
|
||||
{
|
||||
return (!zfs_qat_checksum_disable &&
|
||||
qat_cy_init_done &&
|
||||
s_len >= QAT_MIN_BUF_SIZE &&
|
||||
s_len <= QAT_MAX_BUF_SIZE);
|
||||
}
|
||||
|
||||
void
|
||||
qat_cy_clean(void)
|
||||
{
|
||||
for (Cpa16U i = 0; i < num_inst; i++)
|
||||
cpaCyStopInstance(cy_inst_handles[i]);
|
||||
|
||||
num_inst = 0;
|
||||
qat_cy_init_done = B_FALSE;
|
||||
}
|
||||
|
||||
int
|
||||
qat_cy_init(void)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_FAIL;
|
||||
|
||||
if (qat_cy_init_done)
|
||||
return (0);
|
||||
|
||||
status = cpaCyGetNumInstances(&num_inst);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (-1);
|
||||
|
||||
/* if the user has configured no QAT encryption units just return */
|
||||
if (num_inst == 0)
|
||||
return (0);
|
||||
|
||||
if (num_inst > QAT_CRYPT_MAX_INSTANCES)
|
||||
num_inst = QAT_CRYPT_MAX_INSTANCES;
|
||||
|
||||
status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (-1);
|
||||
|
||||
for (Cpa16U i = 0; i < num_inst; i++) {
|
||||
status = cpaCySetAddressTranslation(cy_inst_handles[i],
|
||||
(void *)virt_to_phys);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto error;
|
||||
|
||||
status = cpaCyStartInstance(cy_inst_handles[i]);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
||||
qat_cy_init_done = B_TRUE;
|
||||
return (0);
|
||||
|
||||
error:
|
||||
qat_cy_clean();
|
||||
return (-1);
|
||||
}
|
||||
|
||||
void
|
||||
qat_cy_fini(void)
|
||||
{
|
||||
if (!qat_cy_init_done)
|
||||
return;
|
||||
|
||||
qat_cy_clean();
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
|
||||
CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
|
||||
Cpa64U crypt, Cpa32U aad_len)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U ctx_size;
|
||||
Cpa32U ciper_algorithm;
|
||||
Cpa32U hash_algorithm;
|
||||
CpaCySymSessionSetupData sd = { 0 };
|
||||
|
||||
if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
|
||||
return (CPA_STATUS_FAIL);
|
||||
} else {
|
||||
ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
|
||||
hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
|
||||
}
|
||||
|
||||
sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
|
||||
sd.cipherSetupData.pCipherKey = key->ck_data;
|
||||
sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
|
||||
sd.hashSetupData.hashAlgorithm = hash_algorithm;
|
||||
sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
|
||||
sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
|
||||
sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
|
||||
sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
|
||||
sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
|
||||
sd.digestIsAppended = CPA_FALSE;
|
||||
sd.verifyDigest = CPA_FALSE;
|
||||
|
||||
if (dir == QAT_ENCRYPT) {
|
||||
sd.cipherSetupData.cipherDirection =
|
||||
CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
|
||||
sd.algChainOrder =
|
||||
CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
|
||||
} else {
|
||||
ASSERT3U(dir, ==, QAT_DECRYPT);
|
||||
sd.cipherSetupData.cipherDirection =
|
||||
CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
|
||||
sd.algChainOrder =
|
||||
CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
|
||||
}
|
||||
|
||||
status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = cpaCySymInitSession(inst_handle, symcallback, &sd,
|
||||
*cy_session_ctx);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
|
||||
return (status);
|
||||
}
|
||||
|
||||
return (CPA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
|
||||
CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U ctx_size;
|
||||
Cpa32U hash_algorithm;
|
||||
CpaCySymSessionSetupData sd = { 0 };
|
||||
|
||||
/*
|
||||
* ZFS's SHA512 checksum is actually SHA512/256, which uses
|
||||
* a different IV from standard SHA512. QAT does not support
|
||||
* SHA512/256, so we can only support SHA256.
|
||||
*/
|
||||
if (cksum == ZIO_CHECKSUM_SHA256)
|
||||
hash_algorithm = CPA_CY_SYM_HASH_SHA256;
|
||||
else
|
||||
return (CPA_STATUS_FAIL);
|
||||
|
||||
sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
|
||||
sd.symOperation = CPA_CY_SYM_OP_HASH;
|
||||
sd.hashSetupData.hashAlgorithm = hash_algorithm;
|
||||
sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
|
||||
sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
|
||||
sd.digestIsAppended = CPA_FALSE;
|
||||
sd.verifyDigest = CPA_FALSE;
|
||||
|
||||
status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = cpaCySymInitSession(inst_handle, symcallback, &sd,
|
||||
*cy_session_ctx);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
|
||||
return (status);
|
||||
}
|
||||
|
||||
return (CPA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
static CpaStatus
|
||||
qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
|
||||
CpaBufferList *src, CpaBufferList *dst)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa32U meta_size = 0;
|
||||
|
||||
status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
return (status);
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto error;
|
||||
|
||||
if (src != dst) {
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
|
||||
meta_size);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
||||
return (CPA_STATUS_SUCCESS);
|
||||
|
||||
error:
|
||||
QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
|
||||
if (src != dst)
|
||||
QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
int
|
||||
qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
|
||||
uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
|
||||
crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
|
||||
{
|
||||
CpaStatus status = CPA_STATUS_SUCCESS;
|
||||
Cpa16U i;
|
||||
CpaInstanceHandle cy_inst_handle;
|
||||
Cpa16U nr_bufs = (enc_len >> PAGE_SHIFT) + 2;
|
||||
Cpa32U bytes_left = 0;
|
||||
Cpa8S *data = NULL;
|
||||
CpaCySymSessionCtx *cy_session_ctx = NULL;
|
||||
cy_callback_t cb;
|
||||
CpaCySymOpData op_data = { 0 };
|
||||
CpaBufferList src_buffer_list = { 0 };
|
||||
CpaBufferList dst_buffer_list = { 0 };
|
||||
CpaFlatBuffer *flat_src_buf_array = NULL;
|
||||
CpaFlatBuffer *flat_src_buf = NULL;
|
||||
CpaFlatBuffer *flat_dst_buf_array = NULL;
|
||||
CpaFlatBuffer *flat_dst_buf = NULL;
|
||||
struct page *in_pages[MAX_PAGE_NUM];
|
||||
struct page *out_pages[MAX_PAGE_NUM];
|
||||
Cpa32U in_page_num = 0;
|
||||
Cpa32U out_page_num = 0;
|
||||
Cpa32U in_page_off = 0;
|
||||
Cpa32U out_page_off = 0;
|
||||
|
||||
if (dir == QAT_ENCRYPT) {
|
||||
QAT_STAT_BUMP(encrypt_requests);
|
||||
QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
|
||||
} else {
|
||||
QAT_STAT_BUMP(decrypt_requests);
|
||||
QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
|
||||
}
|
||||
|
||||
i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
|
||||
cy_inst_handle = cy_inst_handles[i];
|
||||
|
||||
status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
|
||||
&cy_session_ctx, key, crypt, aad_len);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
/* don't count CCM as a failure since it's not supported */
|
||||
if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
|
||||
QAT_STAT_BUMP(crypt_fails);
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* We increment nr_bufs by 2 to allow us to handle non
|
||||
* page-aligned buffer addresses and buffers whose sizes
|
||||
* are not divisible by PAGE_SIZE.
|
||||
*/
|
||||
status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
|
||||
&src_buffer_list, &dst_buffer_list);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
|
||||
nr_bufs * sizeof (CpaFlatBuffer));
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
|
||||
nr_bufs * sizeof (CpaFlatBuffer));
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&op_data.pDigestResult,
|
||||
ZIO_DATA_MAC_LEN);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&op_data.pIv,
|
||||
ZIO_DATA_IV_LEN);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
if (aad_len > 0) {
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&op_data.pAdditionalAuthData,
|
||||
aad_len);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
bcopy(aad_buf, op_data.pAdditionalAuthData, aad_len);
|
||||
}
|
||||
|
||||
bytes_left = enc_len;
|
||||
data = src_buf;
|
||||
flat_src_buf = flat_src_buf_array;
|
||||
while (bytes_left > 0) {
|
||||
in_page_off = ((long)data & ~PAGE_MASK);
|
||||
in_pages[in_page_num] = qat_mem_to_page(data);
|
||||
flat_src_buf->pData = kmap(in_pages[in_page_num]) + in_page_off;
|
||||
flat_src_buf->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - in_page_off, (long)bytes_left);
|
||||
data += flat_src_buf->dataLenInBytes;
|
||||
bytes_left -= flat_src_buf->dataLenInBytes;
|
||||
flat_src_buf++;
|
||||
in_page_num++;
|
||||
}
|
||||
src_buffer_list.pBuffers = flat_src_buf_array;
|
||||
src_buffer_list.numBuffers = in_page_num;
|
||||
|
||||
bytes_left = enc_len;
|
||||
data = dst_buf;
|
||||
flat_dst_buf = flat_dst_buf_array;
|
||||
while (bytes_left > 0) {
|
||||
out_page_off = ((long)data & ~PAGE_MASK);
|
||||
out_pages[out_page_num] = qat_mem_to_page(data);
|
||||
flat_dst_buf->pData = kmap(out_pages[out_page_num]) +
|
||||
out_page_off;
|
||||
flat_dst_buf->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - out_page_off, (long)bytes_left);
|
||||
data += flat_dst_buf->dataLenInBytes;
|
||||
bytes_left -= flat_dst_buf->dataLenInBytes;
|
||||
flat_dst_buf++;
|
||||
out_page_num++;
|
||||
}
|
||||
dst_buffer_list.pBuffers = flat_dst_buf_array;
|
||||
dst_buffer_list.numBuffers = out_page_num;
|
||||
|
||||
op_data.sessionCtx = cy_session_ctx;
|
||||
op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
|
||||
op_data.cryptoStartSrcOffsetInBytes = 0;
|
||||
op_data.messageLenToCipherInBytes = 0;
|
||||
op_data.hashStartSrcOffsetInBytes = 0;
|
||||
op_data.messageLenToHashInBytes = 0;
|
||||
op_data.messageLenToCipherInBytes = enc_len;
|
||||
op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
|
||||
bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN);
|
||||
|
||||
cb.verify_result = CPA_FALSE;
|
||||
init_completion(&cb.complete);
|
||||
status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
|
||||
&src_buffer_list, &dst_buffer_list, NULL);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (!wait_for_completion_interruptible_timeout(&cb.complete,
|
||||
QAT_TIMEOUT_MS)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (cb.verify_result == CPA_FALSE) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* save digest result to digest_buf */
|
||||
bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN);
|
||||
if (dir == QAT_ENCRYPT)
|
||||
QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
|
||||
else
|
||||
QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
|
||||
|
||||
fail:
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
QAT_STAT_BUMP(crypt_fails);
|
||||
|
||||
for (i = 0; i < in_page_num; i++)
|
||||
kunmap(in_pages[i]);
|
||||
for (i = 0; i < out_page_num; i++)
|
||||
kunmap(out_pages[i]);
|
||||
|
||||
cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
|
||||
if (aad_len > 0)
|
||||
QAT_PHYS_CONTIG_FREE(op_data.pAdditionalAuthData);
|
||||
QAT_PHYS_CONTIG_FREE(op_data.pIv);
|
||||
QAT_PHYS_CONTIG_FREE(op_data.pDigestResult);
|
||||
QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
|
||||
QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
|
||||
QAT_PHYS_CONTIG_FREE(cy_session_ctx);
|
||||
QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
|
||||
QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
int
|
||||
qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
CpaStatus status;
|
||||
Cpa16U i;
|
||||
CpaInstanceHandle cy_inst_handle;
|
||||
Cpa16U nr_bufs = (size >> PAGE_SHIFT) + 2;
|
||||
Cpa32U bytes_left = 0;
|
||||
Cpa8S *data = NULL;
|
||||
CpaCySymSessionCtx *cy_session_ctx = NULL;
|
||||
cy_callback_t cb;
|
||||
Cpa8U *digest_buffer = NULL;
|
||||
CpaCySymOpData op_data = { 0 };
|
||||
CpaBufferList src_buffer_list = { 0 };
|
||||
CpaFlatBuffer *flat_src_buf_array = NULL;
|
||||
CpaFlatBuffer *flat_src_buf = NULL;
|
||||
struct page *in_pages[MAX_PAGE_NUM];
|
||||
Cpa32U page_num = 0;
|
||||
Cpa32U page_off = 0;
|
||||
|
||||
QAT_STAT_BUMP(cksum_requests);
|
||||
QAT_STAT_INCR(cksum_total_in_bytes, size);
|
||||
|
||||
i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
|
||||
cy_inst_handle = cy_inst_handles[i];
|
||||
|
||||
status = qat_init_checksum_session_ctx(cy_inst_handle,
|
||||
&cy_session_ctx, cksum);
|
||||
if (status != CPA_STATUS_SUCCESS) {
|
||||
/* don't count unsupported checksums as a failure */
|
||||
if (cksum == ZIO_CHECKSUM_SHA256 ||
|
||||
cksum == ZIO_CHECKSUM_SHA512)
|
||||
QAT_STAT_BUMP(cksum_fails);
|
||||
return (status);
|
||||
}
|
||||
|
||||
/*
|
||||
* We increment nr_bufs by 2 to allow us to handle non
|
||||
* page-aligned buffer addresses and buffers whose sizes
|
||||
* are not divisible by PAGE_SIZE.
|
||||
*/
|
||||
status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
|
||||
&src_buffer_list, &src_buffer_list);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
|
||||
nr_bufs * sizeof (CpaFlatBuffer));
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
status = QAT_PHYS_CONTIG_ALLOC(&digest_buffer,
|
||||
sizeof (zio_cksum_t));
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
bytes_left = size;
|
||||
data = buf;
|
||||
flat_src_buf = flat_src_buf_array;
|
||||
while (bytes_left > 0) {
|
||||
page_off = ((long)data & ~PAGE_MASK);
|
||||
in_pages[page_num] = qat_mem_to_page(data);
|
||||
flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
|
||||
flat_src_buf->dataLenInBytes =
|
||||
min((long)PAGE_SIZE - page_off, (long)bytes_left);
|
||||
data += flat_src_buf->dataLenInBytes;
|
||||
bytes_left -= flat_src_buf->dataLenInBytes;
|
||||
flat_src_buf++;
|
||||
page_num++;
|
||||
}
|
||||
src_buffer_list.pBuffers = flat_src_buf_array;
|
||||
src_buffer_list.numBuffers = page_num;
|
||||
|
||||
op_data.sessionCtx = cy_session_ctx;
|
||||
op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
|
||||
op_data.hashStartSrcOffsetInBytes = 0;
|
||||
op_data.messageLenToHashInBytes = size;
|
||||
op_data.pDigestResult = digest_buffer;
|
||||
|
||||
cb.verify_result = CPA_FALSE;
|
||||
init_completion(&cb.complete);
|
||||
status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
|
||||
&src_buffer_list, &src_buffer_list, NULL);
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (!wait_for_completion_interruptible_timeout(&cb.complete,
|
||||
QAT_TIMEOUT_MS)) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
if (cb.verify_result == CPA_FALSE) {
|
||||
status = CPA_STATUS_FAIL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
|
||||
|
||||
fail:
|
||||
if (status != CPA_STATUS_SUCCESS)
|
||||
QAT_STAT_BUMP(cksum_fails);
|
||||
|
||||
for (i = 0; i < page_num; i++)
|
||||
kunmap(in_pages[i]);
|
||||
|
||||
cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
|
||||
QAT_PHYS_CONTIG_FREE(digest_buffer);
|
||||
QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
|
||||
QAT_PHYS_CONTIG_FREE(cy_session_ctx);
|
||||
QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
static int
|
||||
param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int ret;
|
||||
int *pvalue = kp->arg;
|
||||
ret = param_set_int(val, kp);
|
||||
if (ret)
|
||||
return (ret);
|
||||
/*
|
||||
* zfs_qat_encrypt_disable = 0: enable qat encrypt
|
||||
* try to initialize qat instance if it has not been done
|
||||
*/
|
||||
if (*pvalue == 0 && !qat_cy_init_done) {
|
||||
ret = qat_cy_init();
|
||||
if (ret != 0) {
|
||||
zfs_qat_encrypt_disable = 1;
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int ret;
|
||||
int *pvalue = kp->arg;
|
||||
ret = param_set_int(val, kp);
|
||||
if (ret)
|
||||
return (ret);
|
||||
/*
|
||||
* set_checksum_param_ops = 0: enable qat checksum
|
||||
* try to initialize qat instance if it has not been done
|
||||
*/
|
||||
if (*pvalue == 0 && !qat_cy_init_done) {
|
||||
ret = qat_cy_init();
|
||||
if (ret != 0) {
|
||||
zfs_qat_checksum_disable = 1;
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
return (ret);
|
||||
}
|
||||
|
||||
module_param_call(zfs_qat_encrypt_disable, param_set_qat_encrypt,
|
||||
param_get_int, &zfs_qat_encrypt_disable, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Enable/Disable QAT encryption");
|
||||
|
||||
module_param_call(zfs_qat_checksum_disable, param_set_qat_checksum,
|
||||
param_get_int, &zfs_qat_checksum_disable, 0644);
|
||||
MODULE_PARM_DESC(zfs_qat_checksum_disable, "Enable/Disable QAT checksumming");
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,954 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* LLNL-CODE-403049.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev_disk.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio.h>
|
||||
#include <linux/msdos_fs.h>
|
||||
#include <linux/vfs_compat.h>
|
||||
|
||||
char *zfs_vdev_scheduler = VDEV_SCHEDULER;
|
||||
static void *zfs_vdev_holder = VDEV_HOLDER;
|
||||
|
||||
/* size of the "reserved" partition, in blocks */
|
||||
#define EFI_MIN_RESV_SIZE (16 * 1024)
|
||||
|
||||
/*
|
||||
* Virtual device vector for disks.
|
||||
*/
|
||||
typedef struct dio_request {
|
||||
zio_t *dr_zio; /* Parent ZIO */
|
||||
atomic_t dr_ref; /* References */
|
||||
int dr_error; /* Bio error */
|
||||
int dr_bio_count; /* Count of bio's */
|
||||
struct bio *dr_bio[0]; /* Attached bio's */
|
||||
} dio_request_t;
|
||||
|
||||
|
||||
#if defined(HAVE_OPEN_BDEV_EXCLUSIVE) || defined(HAVE_BLKDEV_GET_BY_PATH)
|
||||
static fmode_t
|
||||
vdev_bdev_mode(int smode)
|
||||
{
|
||||
fmode_t mode = 0;
|
||||
|
||||
ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
|
||||
|
||||
if (smode & FREAD)
|
||||
mode |= FMODE_READ;
|
||||
|
||||
if (smode & FWRITE)
|
||||
mode |= FMODE_WRITE;
|
||||
|
||||
return (mode);
|
||||
}
|
||||
#else
|
||||
static int
|
||||
vdev_bdev_mode(int smode)
|
||||
{
|
||||
int mode = 0;
|
||||
|
||||
ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
|
||||
|
||||
if ((smode & FREAD) && !(smode & FWRITE))
|
||||
mode = SB_RDONLY;
|
||||
|
||||
return (mode);
|
||||
}
|
||||
#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
|
||||
|
||||
/*
|
||||
* Returns the usable capacity (in bytes) for the partition or disk.
|
||||
*/
|
||||
static uint64_t
|
||||
bdev_capacity(struct block_device *bdev)
|
||||
{
|
||||
return (i_size_read(bdev->bd_inode));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the maximum expansion capacity of the block device (in bytes).
|
||||
*
|
||||
* It is possible to expand a vdev when it has been created as a wholedisk
|
||||
* and the containing block device has increased in capacity. Or when the
|
||||
* partition containing the pool has been manually increased in size.
|
||||
*
|
||||
* This function is only responsible for calculating the potential expansion
|
||||
* size so it can be reported by 'zpool list'. The efi_use_whole_disk() is
|
||||
* responsible for verifying the expected partition layout in the wholedisk
|
||||
* case, and updating the partition table if appropriate. Once the partition
|
||||
* size has been increased the additional capacity will be visible using
|
||||
* bdev_capacity().
|
||||
*
|
||||
* The returned maximum expansion capacity is always expected to be larger, or
|
||||
* at the very least equal, to its usable capacity to prevent overestimating
|
||||
* the pool expandsize.
|
||||
*/
|
||||
static uint64_t
|
||||
bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
|
||||
{
|
||||
uint64_t psize;
|
||||
int64_t available;
|
||||
|
||||
if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) {
|
||||
/*
|
||||
* When reporting maximum expansion capacity for a wholedisk
|
||||
* deduct any capacity which is expected to be lost due to
|
||||
* alignment restrictions. Over reporting this value isn't
|
||||
* harmful and would only result in slightly less capacity
|
||||
* than expected post expansion.
|
||||
* The estimated available space may be slightly smaller than
|
||||
* bdev_capacity() for devices where the number of sectors is
|
||||
* not a multiple of the alignment size and the partition layout
|
||||
* is keeping less than PARTITION_END_ALIGNMENT bytes after the
|
||||
* "reserved" EFI partition: in such cases return the device
|
||||
* usable capacity.
|
||||
*/
|
||||
available = i_size_read(bdev->bd_contains->bd_inode) -
|
||||
((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
|
||||
PARTITION_END_ALIGNMENT) << SECTOR_BITS);
|
||||
psize = MAX(available, bdev_capacity(bdev));
|
||||
} else {
|
||||
psize = bdev_capacity(bdev);
|
||||
}
|
||||
|
||||
return (psize);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_error(zio_t *zio)
|
||||
{
|
||||
/*
|
||||
* This function can be called in interrupt context, for instance while
|
||||
* handling IRQs coming from a misbehaving disk device; use printk()
|
||||
* which is safe from any context.
|
||||
*/
|
||||
printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
|
||||
"offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
|
||||
zio->io_vd->vdev_path, zio->io_error, zio->io_type,
|
||||
(u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
|
||||
zio->io_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the Linux 'noop' elevator for zfs managed block devices. This
|
||||
* strikes the ideal balance by allowing the zfs elevator to do all
|
||||
* request ordering and prioritization. While allowing the Linux
|
||||
* elevator to do the maximum front/back merging allowed by the
|
||||
* physical device. This yields the largest possible requests for
|
||||
* the device with the lowest total overhead.
|
||||
*/
|
||||
static void
|
||||
vdev_elevator_switch(vdev_t *v, char *elevator)
|
||||
{
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
struct request_queue *q;
|
||||
char *device;
|
||||
int error;
|
||||
|
||||
for (int c = 0; c < v->vdev_children; c++)
|
||||
vdev_elevator_switch(v->vdev_child[c], elevator);
|
||||
|
||||
if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL)
|
||||
return;
|
||||
|
||||
q = bdev_get_queue(vd->vd_bdev);
|
||||
device = vd->vd_bdev->bd_disk->disk_name;
|
||||
|
||||
/*
|
||||
* Skip devices which are not whole disks (partitions).
|
||||
* Device-mapper devices are excepted since they may be whole
|
||||
* disks despite the vdev_wholedisk flag, in which case we can
|
||||
* and should switch the elevator. If the device-mapper device
|
||||
* does not have an elevator (i.e. dm-raid, dm-crypt, etc.) the
|
||||
* "Skip devices without schedulers" check below will fail.
|
||||
*/
|
||||
if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
|
||||
return;
|
||||
|
||||
/* Leave existing scheduler when set to "none" */
|
||||
if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The elevator_change() function was available in kernels from
|
||||
* 2.6.36 to 4.11. When not available fall back to using the user
|
||||
* mode helper functionality to set the elevator via sysfs. This
|
||||
* requires /bin/echo and sysfs to be mounted which may not be true
|
||||
* early in the boot process.
|
||||
*/
|
||||
#ifdef HAVE_ELEVATOR_CHANGE
|
||||
error = elevator_change(q, elevator);
|
||||
#else
|
||||
#define SET_SCHEDULER_CMD \
|
||||
"exec 0</dev/null " \
|
||||
" 1>/sys/block/%s/queue/scheduler " \
|
||||
" 2>/dev/null; " \
|
||||
"echo %s"
|
||||
|
||||
char *argv[] = { "/bin/sh", "-c", NULL, NULL };
|
||||
char *envp[] = { NULL };
|
||||
|
||||
argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
|
||||
error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
strfree(argv[2]);
|
||||
#endif /* HAVE_ELEVATOR_CHANGE */
|
||||
if (error) {
|
||||
zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d",
|
||||
elevator, v->vdev_path, device, error);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
|
||||
int count = 0, block_size;
|
||||
int bdev_retry_count = 50;
|
||||
vdev_disk_t *vd;
|
||||
|
||||
/* Must have a pathname and it must be absolute. */
|
||||
if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
|
||||
v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
|
||||
vdev_dbgmsg(v, "invalid vdev_path");
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reopen the device if it is currently open. When expanding a
|
||||
* partition force re-scanning the partition table while closed
|
||||
* in order to get an accurate updated block device size. Then
|
||||
* since udev may need to recreate the device links increase the
|
||||
* open retry count before reporting the device as unavailable.
|
||||
*/
|
||||
vd = v->vdev_tsd;
|
||||
if (vd) {
|
||||
char disk_name[BDEVNAME_SIZE + 6] = "/dev/";
|
||||
boolean_t reread_part = B_FALSE;
|
||||
|
||||
rw_enter(&vd->vd_lock, RW_WRITER);
|
||||
bdev = vd->vd_bdev;
|
||||
vd->vd_bdev = NULL;
|
||||
|
||||
if (bdev) {
|
||||
if (v->vdev_expanding && bdev != bdev->bd_contains) {
|
||||
bdevname(bdev->bd_contains, disk_name + 5);
|
||||
reread_part = B_TRUE;
|
||||
}
|
||||
|
||||
vdev_bdev_close(bdev, mode);
|
||||
}
|
||||
|
||||
if (reread_part) {
|
||||
bdev = vdev_bdev_open(disk_name, mode, zfs_vdev_holder);
|
||||
if (!IS_ERR(bdev)) {
|
||||
int error = vdev_bdev_reread_part(bdev);
|
||||
vdev_bdev_close(bdev, mode);
|
||||
if (error == 0)
|
||||
bdev_retry_count = 100;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
|
||||
|
||||
rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL);
|
||||
rw_enter(&vd->vd_lock, RW_WRITER);
|
||||
}
|
||||
|
||||
/*
|
||||
* Devices are always opened by the path provided at configuration
|
||||
* time. This means that if the provided path is a udev by-id path
|
||||
* then drives may be re-cabled without an issue. If the provided
|
||||
* path is a udev by-path path, then the physical location information
|
||||
* will be preserved. This can be critical for more complicated
|
||||
* configurations where drives are located in specific physical
|
||||
* locations to maximize the systems tolerance to component failure.
|
||||
*
|
||||
* Alternatively, you can provide your own udev rule to flexibly map
|
||||
* the drives as you see fit. It is not advised that you use the
|
||||
* /dev/[hd]d devices which may be reordered due to probing order.
|
||||
* Devices in the wrong locations will be detected by the higher
|
||||
* level vdev validation.
|
||||
*
|
||||
* The specified paths may be briefly removed and recreated in
|
||||
* response to udev events. This should be exceptionally unlikely
|
||||
* because the zpool command makes every effort to verify these paths
|
||||
* have already settled prior to reaching this point. Therefore,
|
||||
* a ENOENT failure at this point is highly likely to be transient
|
||||
* and it is reasonable to sleep and retry before giving up. In
|
||||
* practice delays have been observed to be on the order of 100ms.
|
||||
*/
|
||||
bdev = ERR_PTR(-ENXIO);
|
||||
while (IS_ERR(bdev) && count < bdev_retry_count) {
|
||||
bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder);
|
||||
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
|
||||
schedule_timeout(MSEC_TO_TICK(10));
|
||||
count++;
|
||||
} else if (IS_ERR(bdev)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_ERR(bdev)) {
|
||||
int error = -PTR_ERR(bdev);
|
||||
vdev_dbgmsg(v, "open error=%d count=%d", error, count);
|
||||
vd->vd_bdev = NULL;
|
||||
v->vdev_tsd = vd;
|
||||
rw_exit(&vd->vd_lock);
|
||||
return (SET_ERROR(error));
|
||||
} else {
|
||||
vd->vd_bdev = bdev;
|
||||
v->vdev_tsd = vd;
|
||||
rw_exit(&vd->vd_lock);
|
||||
}
|
||||
|
||||
struct request_queue *q = bdev_get_queue(vd->vd_bdev);
|
||||
|
||||
/* Determine the physical block size */
|
||||
block_size = vdev_bdev_block_size(vd->vd_bdev);
|
||||
|
||||
/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
|
||||
v->vdev_nowritecache = B_FALSE;
|
||||
|
||||
/* Set when device reports it supports TRIM. */
|
||||
v->vdev_has_trim = !!blk_queue_discard(q);
|
||||
|
||||
/* Set when device reports it supports secure TRIM. */
|
||||
v->vdev_has_securetrim = !!blk_queue_discard_secure(q);
|
||||
|
||||
/* Inform the ZIO pipeline that we are non-rotational */
|
||||
v->vdev_nonrot = blk_queue_nonrot(q);
|
||||
|
||||
/* Physical volume size in bytes for the partition */
|
||||
*psize = bdev_capacity(vd->vd_bdev);
|
||||
|
||||
/* Physical volume size in bytes including possible expansion space */
|
||||
*max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
|
||||
|
||||
/* Based on the minimum sector size set the block size */
|
||||
*ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
|
||||
|
||||
/* Try to set the io scheduler elevator algorithm */
|
||||
(void) vdev_elevator_switch(v, zfs_vdev_scheduler);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_close(vdev_t *v)
|
||||
{
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
|
||||
if (v->vdev_reopening || vd == NULL)
|
||||
return;
|
||||
|
||||
if (vd->vd_bdev != NULL) {
|
||||
vdev_bdev_close(vd->vd_bdev,
|
||||
vdev_bdev_mode(spa_mode(v->vdev_spa)));
|
||||
}
|
||||
|
||||
rw_destroy(&vd->vd_lock);
|
||||
kmem_free(vd, sizeof (vdev_disk_t));
|
||||
v->vdev_tsd = NULL;
|
||||
}
|
||||
|
||||
static dio_request_t *
|
||||
vdev_disk_dio_alloc(int bio_count)
|
||||
{
|
||||
dio_request_t *dr;
|
||||
int i;
|
||||
|
||||
dr = kmem_zalloc(sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * bio_count, KM_SLEEP);
|
||||
if (dr) {
|
||||
atomic_set(&dr->dr_ref, 0);
|
||||
dr->dr_bio_count = bio_count;
|
||||
dr->dr_error = 0;
|
||||
|
||||
for (i = 0; i < dr->dr_bio_count; i++)
|
||||
dr->dr_bio[i] = NULL;
|
||||
}
|
||||
|
||||
return (dr);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_dio_free(dio_request_t *dr)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dr->dr_bio_count; i++)
|
||||
if (dr->dr_bio[i])
|
||||
bio_put(dr->dr_bio[i]);
|
||||
|
||||
kmem_free(dr, sizeof (dio_request_t) +
|
||||
sizeof (struct bio *) * dr->dr_bio_count);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_dio_get(dio_request_t *dr)
|
||||
{
|
||||
atomic_inc(&dr->dr_ref);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_dio_put(dio_request_t *dr)
|
||||
{
|
||||
int rc = atomic_dec_return(&dr->dr_ref);
|
||||
|
||||
/*
|
||||
* Free the dio_request when the last reference is dropped and
|
||||
* ensure zio_interpret is called only once with the correct zio
|
||||
*/
|
||||
if (rc == 0) {
|
||||
zio_t *zio = dr->dr_zio;
|
||||
int error = dr->dr_error;
|
||||
|
||||
vdev_disk_dio_free(dr);
|
||||
|
||||
if (zio) {
|
||||
zio->io_error = error;
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
}
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
|
||||
{
|
||||
dio_request_t *dr = bio->bi_private;
|
||||
int rc;
|
||||
|
||||
if (dr->dr_error == 0) {
|
||||
#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
dr->dr_error = BIO_END_IO_ERROR(bio);
|
||||
#else
|
||||
if (error)
|
||||
dr->dr_error = -(error);
|
||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
dr->dr_error = EIO;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Drop reference acquired by __vdev_disk_physio */
|
||||
rc = vdev_disk_dio_put(dr);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
|
||||
{
|
||||
unsigned int offset, size, i;
|
||||
struct page *page;
|
||||
|
||||
offset = offset_in_page(bio_ptr);
|
||||
for (i = 0; i < bio->bi_max_vecs; i++) {
|
||||
size = PAGE_SIZE - offset;
|
||||
|
||||
if (bio_size <= 0)
|
||||
break;
|
||||
|
||||
if (size > bio_size)
|
||||
size = bio_size;
|
||||
|
||||
if (is_vmalloc_addr(bio_ptr))
|
||||
page = vmalloc_to_page(bio_ptr);
|
||||
else
|
||||
page = virt_to_page(bio_ptr);
|
||||
|
||||
/*
|
||||
* Some network related block device uses tcp_sendpage, which
|
||||
* doesn't behave well when using 0-count page, this is a
|
||||
* safety net to catch them.
|
||||
*/
|
||||
ASSERT3S(page_count(page), >, 0);
|
||||
|
||||
if (bio_add_page(bio, page, size, offset) != size)
|
||||
break;
|
||||
|
||||
bio_ptr += size;
|
||||
bio_size -= size;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return (bio_size);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off)
|
||||
{
|
||||
if (abd_is_linear(abd))
|
||||
return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size));
|
||||
|
||||
return (abd_scatter_bio_map_off(bio, abd, size, off));
|
||||
}
|
||||
|
||||
static inline void
|
||||
vdev_submit_bio_impl(struct bio *bio)
|
||||
{
|
||||
#ifdef HAVE_1ARG_SUBMIT_BIO
|
||||
submit_bio(bio);
|
||||
#else
|
||||
submit_bio(0, bio);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAVE_BIO_SET_DEV
|
||||
#if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY)
|
||||
/*
|
||||
* The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the
|
||||
* GPL-only bio_associate_blkg() symbol thus inadvertently converting
|
||||
* the entire macro. Provide a minimal version which always assigns the
|
||||
* request queue's root_blkg to the bio.
|
||||
*/
|
||||
static inline void
|
||||
vdev_bio_associate_blkg(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
|
||||
ASSERT3P(q, !=, NULL);
|
||||
ASSERT3P(bio->bi_blkg, ==, NULL);
|
||||
|
||||
if (blkg_tryget(q->root_blkg))
|
||||
bio->bi_blkg = q->root_blkg;
|
||||
}
|
||||
#define bio_associate_blkg vdev_bio_associate_blkg
|
||||
#endif
|
||||
#else
|
||||
/*
|
||||
* Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels.
|
||||
*/
|
||||
static inline void
|
||||
bio_set_dev(struct bio *bio, struct block_device *bdev)
|
||||
{
|
||||
bio->bi_bdev = bdev;
|
||||
}
|
||||
#endif /* HAVE_BIO_SET_DEV */
|
||||
|
||||
static inline void
|
||||
vdev_submit_bio(struct bio *bio)
|
||||
{
|
||||
#ifdef HAVE_CURRENT_BIO_TAIL
|
||||
struct bio **bio_tail = current->bio_tail;
|
||||
current->bio_tail = NULL;
|
||||
vdev_submit_bio_impl(bio);
|
||||
current->bio_tail = bio_tail;
|
||||
#else
|
||||
struct bio_list *bio_list = current->bio_list;
|
||||
current->bio_list = NULL;
|
||||
vdev_submit_bio_impl(bio);
|
||||
current->bio_list = bio_list;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||
{
|
||||
dio_request_t *dr;
|
||||
uint64_t abd_offset;
|
||||
uint64_t bio_offset;
|
||||
int bio_size, bio_count = 16;
|
||||
int i = 0, error = 0;
|
||||
#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
|
||||
struct blk_plug plug;
|
||||
#endif
|
||||
/*
|
||||
* Accessing outside the block device is never allowed.
|
||||
*/
|
||||
if (io_offset + io_size > bdev->bd_inode->i_size) {
|
||||
vdev_dbgmsg(zio->io_vd,
|
||||
"Illegal access %llu size %llu, device size %llu",
|
||||
io_offset, io_size, i_size_read(bdev->bd_inode));
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
retry:
|
||||
dr = vdev_disk_dio_alloc(bio_count);
|
||||
if (dr == NULL)
|
||||
return (SET_ERROR(ENOMEM));
|
||||
|
||||
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
|
||||
bio_set_flags_failfast(bdev, &flags);
|
||||
|
||||
dr->dr_zio = zio;
|
||||
|
||||
/*
|
||||
* When the IO size exceeds the maximum bio size for the request
|
||||
* queue we are forced to break the IO in multiple bio's and wait
|
||||
* for them all to complete. Ideally, all pool users will set
|
||||
* their volume block size to match the maximum request size and
|
||||
* the common case will be one bio per vdev IO request.
|
||||
*/
|
||||
|
||||
abd_offset = 0;
|
||||
bio_offset = io_offset;
|
||||
bio_size = io_size;
|
||||
for (i = 0; i <= dr->dr_bio_count; i++) {
|
||||
|
||||
/* Finished constructing bio's for given buffer */
|
||||
if (bio_size <= 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* By default only 'bio_count' bio's per dio are allowed.
|
||||
* However, if we find ourselves in a situation where more
|
||||
* are needed we allocate a larger dio and warn the user.
|
||||
*/
|
||||
if (dr->dr_bio_count == i) {
|
||||
vdev_disk_dio_free(dr);
|
||||
bio_count *= 2;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* bio_alloc() with __GFP_WAIT never returns NULL */
|
||||
dr->dr_bio[i] = bio_alloc(GFP_NOIO,
|
||||
MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
|
||||
BIO_MAX_PAGES));
|
||||
if (unlikely(dr->dr_bio[i] == NULL)) {
|
||||
vdev_disk_dio_free(dr);
|
||||
return (SET_ERROR(ENOMEM));
|
||||
}
|
||||
|
||||
/* Matching put called by vdev_disk_physio_completion */
|
||||
vdev_disk_dio_get(dr);
|
||||
|
||||
bio_set_dev(dr->dr_bio[i], bdev);
|
||||
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
|
||||
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
|
||||
dr->dr_bio[i]->bi_private = dr;
|
||||
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
|
||||
|
||||
/* Remaining size is returned to become the new size */
|
||||
bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd,
|
||||
bio_size, abd_offset);
|
||||
|
||||
/* Advance in buffer and construct another bio if needed */
|
||||
abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
|
||||
bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
|
||||
}
|
||||
|
||||
/* Extra reference to protect dio_request during vdev_submit_bio */
|
||||
vdev_disk_dio_get(dr);
|
||||
|
||||
#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
|
||||
if (dr->dr_bio_count > 1)
|
||||
blk_start_plug(&plug);
|
||||
#endif
|
||||
|
||||
/* Submit all bio's associated with this dio */
|
||||
for (i = 0; i < dr->dr_bio_count; i++)
|
||||
if (dr->dr_bio[i])
|
||||
vdev_submit_bio(dr->dr_bio[i]);
|
||||
|
||||
#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
|
||||
if (dr->dr_bio_count > 1)
|
||||
blk_finish_plug(&plug);
|
||||
#endif
|
||||
|
||||
(void) vdev_disk_dio_put(dr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||
{
|
||||
zio_t *zio = bio->bi_private;
|
||||
#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
zio->io_error = BIO_END_IO_ERROR(bio);
|
||||
#else
|
||||
zio->io_error = -error;
|
||||
#endif
|
||||
|
||||
if (zio->io_error && (zio->io_error == EOPNOTSUPP))
|
||||
zio->io_vd->vdev_nowritecache = B_TRUE;
|
||||
|
||||
bio_put(bio);
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct bio *bio;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!q)
|
||||
return (SET_ERROR(ENXIO));
|
||||
|
||||
bio = bio_alloc(GFP_NOIO, 0);
|
||||
/* bio_alloc() with __GFP_WAIT never returns NULL */
|
||||
if (unlikely(bio == NULL))
|
||||
return (SET_ERROR(ENOMEM));
|
||||
|
||||
bio->bi_end_io = vdev_disk_io_flush_completion;
|
||||
bio->bi_private = zio;
|
||||
bio_set_dev(bio, bdev);
|
||||
bio_set_flush(bio);
|
||||
vdev_submit_bio(bio);
|
||||
invalidate_bdev(bdev);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
unsigned long trim_flags = 0;
|
||||
int rw, flags, error;
|
||||
|
||||
/*
|
||||
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
|
||||
* Nothing to be done here but return failure.
|
||||
*/
|
||||
if (vd == NULL) {
|
||||
zio->io_error = ENXIO;
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
rw_enter(&vd->vd_lock, RW_READER);
|
||||
|
||||
/*
|
||||
* If the vdev is closed, it's likely due to a failed reopen and is
|
||||
* in the UNAVAIL state. Nothing to be done here but return failure.
|
||||
*/
|
||||
if (vd->vd_bdev == NULL) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio->io_error = ENXIO;
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (zio->io_type) {
|
||||
case ZIO_TYPE_IOCTL:
|
||||
|
||||
if (!vdev_readable(v)) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio->io_error = SET_ERROR(ENXIO);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (zio->io_cmd) {
|
||||
case DKIOCFLUSHWRITECACHE:
|
||||
|
||||
if (zfs_nocacheflush)
|
||||
break;
|
||||
|
||||
if (v->vdev_nowritecache) {
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
break;
|
||||
}
|
||||
|
||||
error = vdev_disk_io_flush(vd->vd_bdev, zio);
|
||||
if (error == 0) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
zio->io_error = error;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
}
|
||||
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio_execute(zio);
|
||||
return;
|
||||
case ZIO_TYPE_WRITE:
|
||||
rw = WRITE;
|
||||
#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
|
||||
flags = (1 << BIO_RW_UNPLUG);
|
||||
#elif defined(REQ_UNPLUG)
|
||||
flags = REQ_UNPLUG;
|
||||
#else
|
||||
flags = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
rw = READ;
|
||||
#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
|
||||
flags = (1 << BIO_RW_UNPLUG);
|
||||
#elif defined(REQ_UNPLUG)
|
||||
flags = REQ_UNPLUG;
|
||||
#else
|
||||
flags = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ZIO_TYPE_TRIM:
|
||||
#if defined(BLKDEV_DISCARD_SECURE)
|
||||
if (zio->io_trim_flags & ZIO_TRIM_SECURE)
|
||||
trim_flags |= BLKDEV_DISCARD_SECURE;
|
||||
#endif
|
||||
zio->io_error = -blkdev_issue_discard(vd->vd_bdev,
|
||||
zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS,
|
||||
trim_flags);
|
||||
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
|
||||
default:
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
error = __vdev_disk_physio(vd->vd_bdev, zio,
|
||||
zio->io_size, zio->io_offset, rw, flags);
|
||||
rw_exit(&vd->vd_lock);
|
||||
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_io_done(zio_t *zio)
|
||||
{
|
||||
/*
|
||||
* If the device returned EIO, we revalidate the media. If it is
|
||||
* determined the media has changed this triggers the asynchronous
|
||||
* removal of the device from the configuration.
|
||||
*/
|
||||
if (zio->io_error == EIO) {
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
|
||||
if (check_disk_change(vd->vd_bdev)) {
|
||||
vdev_bdev_invalidate(vd->vd_bdev);
|
||||
v->vdev_remove_wanted = B_TRUE;
|
||||
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_hold(vdev_t *vd)
|
||||
{
|
||||
ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
|
||||
|
||||
/* We must have a pathname, and it must be absolute. */
|
||||
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
|
||||
return;
|
||||
|
||||
/*
|
||||
* Only prefetch path and devid info if the device has
|
||||
* never been opened.
|
||||
*/
|
||||
if (vd->vdev_tsd != NULL)
|
||||
return;
|
||||
|
||||
/* XXX: Implement me as a vnode lookup for the device */
|
||||
vd->vdev_name_vp = NULL;
|
||||
vd->vdev_devid_vp = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_rele(vdev_t *vd)
|
||||
{
|
||||
ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
|
||||
|
||||
/* XXX: Implement me as a vnode rele for the device */
|
||||
}
|
||||
|
||||
static int
|
||||
param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
spa_t *spa = NULL;
|
||||
char *p;
|
||||
|
||||
if (val == NULL)
|
||||
return (SET_ERROR(-EINVAL));
|
||||
|
||||
if ((p = strchr(val, '\n')) != NULL)
|
||||
*p = '\0';
|
||||
|
||||
if (spa_mode_global != 0) {
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
while ((spa = spa_next(spa)) != NULL) {
|
||||
if (spa_state(spa) != POOL_STATE_ACTIVE ||
|
||||
!spa_writeable(spa) || spa_suspended(spa))
|
||||
continue;
|
||||
|
||||
spa_open_ref(spa, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
|
||||
return (param_set_charp(val, kp));
|
||||
}
|
||||
|
||||
vdev_ops_t vdev_disk_ops = {
|
||||
.vdev_op_open = vdev_disk_open,
|
||||
.vdev_op_close = vdev_disk_close,
|
||||
.vdev_op_asize = vdev_default_asize,
|
||||
.vdev_op_io_start = vdev_disk_io_start,
|
||||
.vdev_op_io_done = vdev_disk_io_done,
|
||||
.vdev_op_state_change = NULL,
|
||||
.vdev_op_need_resilver = NULL,
|
||||
.vdev_op_hold = vdev_disk_hold,
|
||||
.vdev_op_rele = vdev_disk_rele,
|
||||
.vdev_op_remap = NULL,
|
||||
.vdev_op_xlate = vdev_default_xlate,
|
||||
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
|
||||
.vdev_op_leaf = B_TRUE /* leaf vdev */
|
||||
};
|
||||
|
||||
module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
|
||||
param_get_charp, &zfs_vdev_scheduler, 0644);
|
||||
MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
|
||||
@@ -0,0 +1,331 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev_file.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
/*
|
||||
* Virtual device vector for files.
|
||||
*/
|
||||
|
||||
static taskq_t *vdev_file_taskq;
|
||||
|
||||
static void
|
||||
vdev_file_hold(vdev_t *vd)
|
||||
{
|
||||
ASSERT(vd->vdev_path != NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_rele(vdev_t *vd)
|
||||
{
|
||||
ASSERT(vd->vdev_path != NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
vdev_file_t *vf;
|
||||
vnode_t *vp;
|
||||
vattr_t vattr;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Rotational optimizations only make sense on block devices.
|
||||
*/
|
||||
vd->vdev_nonrot = B_TRUE;
|
||||
|
||||
/*
|
||||
* Allow TRIM on file based vdevs. This may not always be supported,
|
||||
* since it depends on your kernel version and underlying filesystem
|
||||
* type but it is always safe to attempt.
|
||||
*/
|
||||
vd->vdev_has_trim = B_TRUE;
|
||||
|
||||
/*
|
||||
* Disable secure TRIM on file based vdevs. There is no way to
|
||||
* request this behavior from the underlying filesystem.
|
||||
*/
|
||||
vd->vdev_has_securetrim = B_FALSE;
|
||||
|
||||
/*
|
||||
* We must have a pathname, and it must be absolute.
|
||||
*/
|
||||
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reopen the device if it's not currently open. Otherwise,
|
||||
* just update the physical size of the device.
|
||||
*/
|
||||
if (vd->vdev_tsd != NULL) {
|
||||
ASSERT(vd->vdev_reopening);
|
||||
vf = vd->vdev_tsd;
|
||||
goto skip_open;
|
||||
}
|
||||
|
||||
vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
|
||||
|
||||
/*
|
||||
* We always open the files from the root of the global zone, even if
|
||||
* we're in a local zone. If the user has gotten to this point, the
|
||||
* administrator has already decided that the pool should be available
|
||||
* to local zone users, so the underlying devices should be as well.
|
||||
*/
|
||||
ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
|
||||
error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
|
||||
spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
|
||||
|
||||
if (error) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (error);
|
||||
}
|
||||
|
||||
vf->vf_vnode = vp;
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Make sure it's a regular file.
|
||||
*/
|
||||
if (vp->v_type != VREG) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (SET_ERROR(ENODEV));
|
||||
}
|
||||
#endif
|
||||
|
||||
skip_open:
|
||||
/*
|
||||
* Determine the physical size of the file.
|
||||
*/
|
||||
vattr.va_mask = AT_SIZE;
|
||||
error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
|
||||
if (error) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (error);
|
||||
}
|
||||
|
||||
*max_psize = *psize = vattr.va_size;
|
||||
*ashift = SPA_MINBLOCKSHIFT;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_close(vdev_t *vd)
|
||||
{
|
||||
vdev_file_t *vf = vd->vdev_tsd;
|
||||
|
||||
if (vd->vdev_reopening || vf == NULL)
|
||||
return;
|
||||
|
||||
if (vf->vf_vnode != NULL) {
|
||||
(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
|
||||
(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
|
||||
kcred, NULL);
|
||||
}
|
||||
|
||||
vd->vdev_delayed_close = B_FALSE;
|
||||
kmem_free(vf, sizeof (vdev_file_t));
|
||||
vd->vdev_tsd = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_io_strategy(void *arg)
|
||||
{
|
||||
zio_t *zio = (zio_t *)arg;
|
||||
vdev_t *vd = zio->io_vd;
|
||||
vdev_file_t *vf = vd->vdev_tsd;
|
||||
ssize_t resid;
|
||||
void *buf;
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ)
|
||||
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
||||
else
|
||||
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
||||
|
||||
zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
|
||||
UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size,
|
||||
zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ)
|
||||
abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
|
||||
else
|
||||
abd_return_buf(zio->io_abd, buf, zio->io_size);
|
||||
|
||||
if (resid != 0 && zio->io_error == 0)
|
||||
zio->io_error = SET_ERROR(ENOSPC);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_io_fsync(void *arg)
|
||||
{
|
||||
zio_t *zio = (zio_t *)arg;
|
||||
vdev_file_t *vf = zio->io_vd->vdev_tsd;
|
||||
|
||||
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
|
||||
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *vd = zio->io_vd;
|
||||
vdev_file_t *vf = vd->vdev_tsd;
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_IOCTL) {
|
||||
/* XXPOLICY */
|
||||
if (!vdev_readable(vd)) {
|
||||
zio->io_error = SET_ERROR(ENXIO);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (zio->io_cmd) {
|
||||
case DKIOCFLUSHWRITECACHE:
|
||||
|
||||
if (zfs_nocacheflush)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We cannot safely call vfs_fsync() when PF_FSTRANS
|
||||
* is set in the current context. Filesystems like
|
||||
* XFS include sanity checks to verify it is not
|
||||
* already set, see xfs_vm_writepage(). Therefore
|
||||
* the sync must be dispatched to a different context.
|
||||
*/
|
||||
if (__spl_pf_fstrans_check()) {
|
||||
VERIFY3U(taskq_dispatch(vdev_file_taskq,
|
||||
vdev_file_io_fsync, zio, TQ_SLEEP), !=,
|
||||
TASKQID_INVALID);
|
||||
return;
|
||||
}
|
||||
|
||||
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
|
||||
kcred, NULL);
|
||||
break;
|
||||
default:
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
}
|
||||
|
||||
zio_execute(zio);
|
||||
return;
|
||||
} else if (zio->io_type == ZIO_TYPE_TRIM) {
|
||||
struct flock flck;
|
||||
|
||||
ASSERT3U(zio->io_size, !=, 0);
|
||||
bzero(&flck, sizeof (flck));
|
||||
flck.l_type = F_FREESP;
|
||||
flck.l_start = zio->io_offset;
|
||||
flck.l_len = zio->io_size;
|
||||
flck.l_whence = SEEK_SET;
|
||||
|
||||
zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck,
|
||||
0, 0, kcred, NULL);
|
||||
|
||||
zio_execute(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
|
||||
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
|
||||
TQ_SLEEP), !=, TASKQID_INVALID);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
vdev_file_io_done(zio_t *zio)
|
||||
{
|
||||
}
|
||||
|
||||
vdev_ops_t vdev_file_ops = {
|
||||
.vdev_op_open = vdev_file_open,
|
||||
.vdev_op_close = vdev_file_close,
|
||||
.vdev_op_asize = vdev_default_asize,
|
||||
.vdev_op_io_start = vdev_file_io_start,
|
||||
.vdev_op_io_done = vdev_file_io_done,
|
||||
.vdev_op_state_change = NULL,
|
||||
.vdev_op_need_resilver = NULL,
|
||||
.vdev_op_hold = vdev_file_hold,
|
||||
.vdev_op_rele = vdev_file_rele,
|
||||
.vdev_op_remap = NULL,
|
||||
.vdev_op_xlate = vdev_default_xlate,
|
||||
.vdev_op_type = VDEV_TYPE_FILE, /* name of this vdev type */
|
||||
.vdev_op_leaf = B_TRUE /* leaf vdev */
|
||||
};
|
||||
|
||||
void
|
||||
vdev_file_init(void)
|
||||
{
|
||||
vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
|
||||
minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
|
||||
|
||||
VERIFY(vdev_file_taskq);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_file_fini(void)
|
||||
{
|
||||
taskq_destroy(vdev_file_taskq);
|
||||
}
|
||||
|
||||
/*
|
||||
* From userland we access disks just like files.
|
||||
*/
|
||||
#ifndef _KERNEL
|
||||
|
||||
vdev_ops_t vdev_disk_ops = {
|
||||
.vdev_op_open = vdev_file_open,
|
||||
.vdev_op_close = vdev_file_close,
|
||||
.vdev_op_asize = vdev_default_asize,
|
||||
.vdev_op_io_start = vdev_file_io_start,
|
||||
.vdev_op_io_done = vdev_file_io_done,
|
||||
.vdev_op_state_change = NULL,
|
||||
.vdev_op_need_resilver = NULL,
|
||||
.vdev_op_hold = vdev_file_hold,
|
||||
.vdev_op_rele = vdev_file_rele,
|
||||
.vdev_op_remap = NULL,
|
||||
.vdev_op_xlate = vdev_default_xlate,
|
||||
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
|
||||
.vdev_op_leaf = B_TRUE /* leaf vdev */
|
||||
};
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
typedef struct zfs_dbgmsg {
|
||||
procfs_list_node_t zdm_node;
|
||||
time_t zdm_timestamp;
|
||||
int zdm_size;
|
||||
char zdm_msg[1]; /* variable length allocation */
|
||||
} zfs_dbgmsg_t;
|
||||
|
||||
procfs_list_t zfs_dbgmsgs;
|
||||
int zfs_dbgmsg_size = 0;
|
||||
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
|
||||
|
||||
/*
|
||||
* Internal ZFS debug messages are enabled by default.
|
||||
*
|
||||
* # Print debug messages
|
||||
* cat /proc/spl/kstat/zfs/dbgmsg
|
||||
*
|
||||
* # Disable the kernel debug message log.
|
||||
* echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
|
||||
*
|
||||
* # Clear the kernel debug message log.
|
||||
* echo 0 >/proc/spl/kstat/zfs/dbgmsg
|
||||
*/
|
||||
int zfs_dbgmsg_enable = 1;
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_show_header(struct seq_file *f)
|
||||
{
|
||||
seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_show(struct seq_file *f, void *p)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
|
||||
seq_printf(f, "%-12llu %-s\n",
|
||||
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_dbgmsg_purge(int max_size)
|
||||
{
|
||||
while (zfs_dbgmsg_size > max_size) {
|
||||
zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
|
||||
if (zdm == NULL)
|
||||
return;
|
||||
|
||||
int size = zdm->zdm_size;
|
||||
kmem_free(zdm, size);
|
||||
zfs_dbgmsg_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_clear(procfs_list_t *procfs_list)
|
||||
{
|
||||
mutex_enter(&zfs_dbgmsgs.pl_lock);
|
||||
zfs_dbgmsg_purge(0);
|
||||
mutex_exit(&zfs_dbgmsgs.pl_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_dbgmsg_init(void)
|
||||
{
|
||||
procfs_list_install("zfs",
|
||||
"dbgmsg",
|
||||
0600,
|
||||
&zfs_dbgmsgs,
|
||||
zfs_dbgmsg_show,
|
||||
zfs_dbgmsg_show_header,
|
||||
zfs_dbgmsg_clear,
|
||||
offsetof(zfs_dbgmsg_t, zdm_node));
|
||||
}
|
||||
|
||||
void
|
||||
zfs_dbgmsg_fini(void)
|
||||
{
|
||||
procfs_list_uninstall(&zfs_dbgmsgs);
|
||||
zfs_dbgmsg_purge(0);
|
||||
|
||||
/*
|
||||
* TODO - decide how to make this permanent
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
procfs_list_destroy(&zfs_dbgmsgs);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
__set_error(const char *file, const char *func, int line, int err)
|
||||
{
|
||||
/*
|
||||
* To enable this:
|
||||
*
|
||||
* $ echo 512 >/sys/module/zfs/parameters/zfs_flags
|
||||
*/
|
||||
if (zfs_flags & ZFS_DEBUG_SET_ERROR)
|
||||
__dprintf(B_FALSE, file, func, line, "error %lu", err);
|
||||
}
|
||||
|
||||
void
|
||||
__zfs_dbgmsg(char *buf)
|
||||
{
|
||||
int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
|
||||
zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
|
||||
zdm->zdm_size = size;
|
||||
zdm->zdm_timestamp = gethrestime_sec();
|
||||
strcpy(zdm->zdm_msg, buf);
|
||||
|
||||
mutex_enter(&zfs_dbgmsgs.pl_lock);
|
||||
procfs_list_add(&zfs_dbgmsgs, zdm);
|
||||
zfs_dbgmsg_size += size;
|
||||
zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
|
||||
mutex_exit(&zfs_dbgmsgs.pl_lock);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
void
|
||||
__dprintf(boolean_t dprint, const char *file, const char *func,
|
||||
int line, const char *fmt, ...)
|
||||
{
|
||||
const char *newfile;
|
||||
va_list adx;
|
||||
size_t size;
|
||||
char *buf;
|
||||
char *nl;
|
||||
int i;
|
||||
char *prefix = (dprint) ? "dprintf: " : "";
|
||||
|
||||
size = 1024;
|
||||
buf = kmem_alloc(size, KM_SLEEP);
|
||||
|
||||
/*
|
||||
* Get rid of annoying prefix to filename.
|
||||
*/
|
||||
newfile = strrchr(file, '/');
|
||||
if (newfile != NULL) {
|
||||
newfile = newfile + 1; /* Get rid of leading / */
|
||||
} else {
|
||||
newfile = file;
|
||||
}
|
||||
|
||||
i = snprintf(buf, size, "%s%s:%d:%s(): ", prefix, newfile, line, func);
|
||||
|
||||
if (i < size) {
|
||||
va_start(adx, fmt);
|
||||
(void) vsnprintf(buf + i, size - i, fmt, adx);
|
||||
va_end(adx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get rid of trailing newline for dprintf logs.
|
||||
*/
|
||||
if (dprint && buf[0] != '\0') {
|
||||
nl = &buf[strlen(buf) - 1];
|
||||
if (*nl == '\n')
|
||||
*nl = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* To get this data enable the zfs__dprintf trace point as shown:
|
||||
*
|
||||
* # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
|
||||
* $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
|
||||
* $ echo 0 > /sys/kernel/debug/tracing/trace
|
||||
*
|
||||
* # Dump the ring buffer.
|
||||
* $ cat /sys/kernel/debug/tracing/trace
|
||||
*/
|
||||
DTRACE_PROBE1(zfs__dprintf, char *, buf);
|
||||
|
||||
/*
|
||||
* To get this data:
|
||||
*
|
||||
* $ cat /proc/spl/kstat/zfs/dbgmsg
|
||||
*
|
||||
* To clear the buffer:
|
||||
* $ echo 0 > /proc/spl/kstat/zfs/dbgmsg
|
||||
*/
|
||||
__zfs_dbgmsg(buf);
|
||||
|
||||
kmem_free(buf, size);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
zfs_dbgmsg_print(const char *tag)
|
||||
{
|
||||
ssize_t ret __attribute__((unused));
|
||||
|
||||
/*
|
||||
* We use write() in this function instead of printf()
|
||||
* so it is safe to call from a signal handler.
|
||||
*/
|
||||
ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
|
||||
ret = write(STDOUT_FILENO, tag, strlen(tag));
|
||||
ret = write(STDOUT_FILENO, ") START:\n", 9);
|
||||
|
||||
mutex_enter(&zfs_dbgmsgs.pl_lock);
|
||||
for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
|
||||
zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) {
|
||||
ret = write(STDOUT_FILENO, zdm->zdm_msg,
|
||||
strlen(zdm->zdm_msg));
|
||||
ret = write(STDOUT_FILENO, "\n", 1);
|
||||
}
|
||||
|
||||
ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
|
||||
ret = write(STDOUT_FILENO, tag, strlen(tag));
|
||||
ret = write(STDOUT_FILENO, ") END\n", 6);
|
||||
|
||||
mutex_exit(&zfs_dbgmsgs.pl_lock);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef _KERNEL
|
||||
module_param(zfs_dbgmsg_enable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
|
||||
|
||||
module_param(zfs_dbgmsg_maxsize, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,661 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2018, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_sysfs.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <linux/kobject.h>
|
||||
|
||||
#include "zfs_prop.h"
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
#error kernel builds only
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZFS Module sysfs support
|
||||
*
|
||||
* This extends our sysfs '/sys/module/zfs' entry to include feature
|
||||
* and property attributes. The primary consumer of this information
|
||||
* is user processes, like the zfs CLI, that need to know what the
|
||||
* current loaded ZFS module supports. The libzfs binary will consult
|
||||
* this information when instantiating the zfs|zpool property tables
|
||||
* and the pool features table.
|
||||
*
|
||||
* The added top-level directories are:
|
||||
* /sys/module/zfs
|
||||
* ├── features.kernel
|
||||
* ├── features.pool
|
||||
* ├── properties.dataset
|
||||
* └── properties.pool
|
||||
*
|
||||
* The local interface for the zfs kobjects includes:
|
||||
* zfs_kobj_init()
|
||||
* zfs_kobj_add()
|
||||
* zfs_kobj_release()
|
||||
* zfs_kobj_add_attr()
|
||||
* zfs_kobj_fini()
|
||||
*/
|
||||
|
||||
/*
|
||||
* A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs'
|
||||
*/
|
||||
struct zfs_mod_kobj;
|
||||
typedef struct zfs_mod_kobj zfs_mod_kobj_t;
|
||||
|
||||
struct zfs_mod_kobj {
|
||||
struct kobject zko_kobj;
|
||||
struct kobj_type zko_kobj_type;
|
||||
struct sysfs_ops zko_sysfs_ops;
|
||||
size_t zko_attr_count;
|
||||
struct attribute *zko_attr_list; /* allocated */
|
||||
struct attribute **zko_default_attrs; /* allocated */
|
||||
size_t zko_child_count;
|
||||
zfs_mod_kobj_t *zko_children; /* allocated */
|
||||
};
|
||||
|
||||
#define ATTR_TABLE_SIZE(cnt) (sizeof (struct attribute) * (cnt))
|
||||
/* Note +1 for NULL terminator slot */
|
||||
#define DEFAULT_ATTR_SIZE(cnt) (sizeof (struct attribute *) * (cnt + 1))
|
||||
#define CHILD_TABLE_SIZE(cnt) (sizeof (zfs_mod_kobj_t) * (cnt))
|
||||
|
||||
/*
|
||||
* These are the top-level kobjects under '/sys/module/zfs/'
|
||||
*/
|
||||
static zfs_mod_kobj_t kernel_features_kobj;
|
||||
static zfs_mod_kobj_t pool_features_kobj;
|
||||
static zfs_mod_kobj_t dataset_props_kobj;
|
||||
static zfs_mod_kobj_t pool_props_kobj;
|
||||
|
||||
/*
|
||||
* The show function is used to provide the content
|
||||
* of an attribute into a PAGE_SIZE buffer.
|
||||
*/
|
||||
typedef ssize_t (*sysfs_show_func)(struct kobject *, struct attribute *,
|
||||
char *);
|
||||
|
||||
static void
|
||||
zfs_kobj_fini(zfs_mod_kobj_t *zkobj)
|
||||
{
|
||||
/* finalize any child kobjects */
|
||||
if (zkobj->zko_child_count != 0) {
|
||||
ASSERT(zkobj->zko_children);
|
||||
for (int i = 0; i < zkobj->zko_child_count; i++)
|
||||
zfs_kobj_fini(&zkobj->zko_children[i]);
|
||||
}
|
||||
|
||||
/* kobject_put() will call zfs_kobj_release() to release memory */
|
||||
kobject_del(&zkobj->zko_kobj);
|
||||
kobject_put(&zkobj->zko_kobj);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_kobj_release(struct kobject *kobj)
|
||||
{
|
||||
zfs_mod_kobj_t *zkobj = container_of(kobj, zfs_mod_kobj_t, zko_kobj);
|
||||
|
||||
if (zkobj->zko_attr_list != NULL) {
|
||||
ASSERT3S(zkobj->zko_attr_count, !=, 0);
|
||||
kmem_free(zkobj->zko_attr_list,
|
||||
ATTR_TABLE_SIZE(zkobj->zko_attr_count));
|
||||
zkobj->zko_attr_list = NULL;
|
||||
}
|
||||
|
||||
if (zkobj->zko_default_attrs != NULL) {
|
||||
kmem_free(zkobj->zko_default_attrs,
|
||||
DEFAULT_ATTR_SIZE(zkobj->zko_attr_count));
|
||||
zkobj->zko_default_attrs = NULL;
|
||||
}
|
||||
|
||||
if (zkobj->zko_child_count != 0) {
|
||||
ASSERT(zkobj->zko_children);
|
||||
|
||||
kmem_free(zkobj->zko_children,
|
||||
CHILD_TABLE_SIZE(zkobj->zko_child_count));
|
||||
zkobj->zko_child_count = 0;
|
||||
zkobj->zko_children = NULL;
|
||||
}
|
||||
|
||||
zkobj->zko_attr_count = 0;
|
||||
}
|
||||
|
||||
#ifndef sysfs_attr_init
|
||||
#define sysfs_attr_init(attr) do {} while (0)
|
||||
#endif
|
||||
|
||||
static void
|
||||
zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name)
|
||||
{
|
||||
VERIFY3U(attr_num, <, zkobj->zko_attr_count);
|
||||
ASSERT(zkobj->zko_attr_list);
|
||||
ASSERT(zkobj->zko_default_attrs);
|
||||
|
||||
zkobj->zko_attr_list[attr_num].name = attr_name;
|
||||
zkobj->zko_attr_list[attr_num].mode = 0444;
|
||||
zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num];
|
||||
sysfs_attr_init(&zkobj->zko_attr_list[attr_num]);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
|
||||
sysfs_show_func show_func)
|
||||
{
|
||||
/*
|
||||
* Initialize object's attributes. Count can be zero.
|
||||
*/
|
||||
if (attr_cnt > 0) {
|
||||
zkobj->zko_attr_list = kmem_zalloc(ATTR_TABLE_SIZE(attr_cnt),
|
||||
KM_SLEEP);
|
||||
if (zkobj->zko_attr_list == NULL)
|
||||
return (ENOMEM);
|
||||
}
|
||||
/* this will always have at least one slot for NULL termination */
|
||||
zkobj->zko_default_attrs = kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt),
|
||||
KM_SLEEP);
|
||||
if (zkobj->zko_default_attrs == NULL) {
|
||||
if (zkobj->zko_attr_list != NULL) {
|
||||
kmem_free(zkobj->zko_attr_list,
|
||||
ATTR_TABLE_SIZE(attr_cnt));
|
||||
}
|
||||
return (ENOMEM);
|
||||
}
|
||||
zkobj->zko_attr_count = attr_cnt;
|
||||
zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_attrs;
|
||||
|
||||
if (child_cnt > 0) {
|
||||
zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt),
|
||||
KM_SLEEP);
|
||||
if (zkobj->zko_children == NULL) {
|
||||
if (zkobj->zko_default_attrs != NULL) {
|
||||
kmem_free(zkobj->zko_default_attrs,
|
||||
DEFAULT_ATTR_SIZE(attr_cnt));
|
||||
}
|
||||
if (zkobj->zko_attr_list != NULL) {
|
||||
kmem_free(zkobj->zko_attr_list,
|
||||
ATTR_TABLE_SIZE(attr_cnt));
|
||||
}
|
||||
return (ENOMEM);
|
||||
}
|
||||
zkobj->zko_child_count = child_cnt;
|
||||
}
|
||||
|
||||
zkobj->zko_sysfs_ops.show = show_func;
|
||||
zkobj->zko_kobj_type.sysfs_ops = &zkobj->zko_sysfs_ops;
|
||||
zkobj->zko_kobj_type.release = zfs_kobj_release;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
|
||||
{
|
||||
/* zko_default_attrs must be NULL terminated */
|
||||
ASSERT(zkobj->zko_default_attrs != NULL);
|
||||
ASSERT(zkobj->zko_default_attrs[zkobj->zko_attr_count] == NULL);
|
||||
|
||||
kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type);
|
||||
return (kobject_add(&zkobj->zko_kobj, parent, name));
|
||||
}
|
||||
|
||||
/*
|
||||
* Each zfs property has these common attributes
|
||||
*/
|
||||
static const char *zprop_attrs[] = {
|
||||
"type",
|
||||
"readonly",
|
||||
"setonce",
|
||||
"visible",
|
||||
"values",
|
||||
"default",
|
||||
"datasets" /* zfs properties only */
|
||||
};
|
||||
|
||||
#define ZFS_PROP_ATTR_COUNT ARRAY_SIZE(zprop_attrs)
|
||||
#define ZPOOL_PROP_ATTR_COUNT (ZFS_PROP_ATTR_COUNT - 1)
|
||||
|
||||
static const char *zprop_types[] = {
|
||||
"number",
|
||||
"string",
|
||||
"index",
|
||||
};
|
||||
|
||||
typedef struct zfs_type_map {
|
||||
zfs_type_t ztm_type;
|
||||
const char *ztm_name;
|
||||
} zfs_type_map_t;
|
||||
|
||||
static zfs_type_map_t type_map[] = {
|
||||
{ZFS_TYPE_FILESYSTEM, "filesystem"},
|
||||
{ZFS_TYPE_SNAPSHOT, "snapshot"},
|
||||
{ZFS_TYPE_VOLUME, "volume"},
|
||||
{ZFS_TYPE_BOOKMARK, "bookmark"}
|
||||
};
|
||||
|
||||
/*
|
||||
* Show the content for a zfs property attribute
|
||||
*/
|
||||
static ssize_t
|
||||
zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
|
||||
char *buf, size_t buflen)
|
||||
{
|
||||
const char *show_str;
|
||||
char number[32];
|
||||
|
||||
/* For dataset properties list the dataset types that apply */
|
||||
if (strcmp(attr_name, "datasets") == 0 &&
|
||||
property->pd_types != ZFS_TYPE_POOL) {
|
||||
int len = 0;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
|
||||
if (type_map[i].ztm_type & property->pd_types) {
|
||||
len += snprintf(buf + len, buflen - len, "%s ",
|
||||
type_map[i].ztm_name);
|
||||
}
|
||||
}
|
||||
len += snprintf(buf + len, buflen - len, "\n");
|
||||
return (len);
|
||||
}
|
||||
|
||||
if (strcmp(attr_name, "type") == 0) {
|
||||
show_str = zprop_types[property->pd_proptype];
|
||||
} else if (strcmp(attr_name, "readonly") == 0) {
|
||||
show_str = property->pd_attr == PROP_READONLY ? "1" : "0";
|
||||
} else if (strcmp(attr_name, "setonce") == 0) {
|
||||
show_str = property->pd_attr == PROP_ONETIME ? "1" : "0";
|
||||
} else if (strcmp(attr_name, "visible") == 0) {
|
||||
show_str = property->pd_visible ? "1" : "0";
|
||||
} else if (strcmp(attr_name, "values") == 0) {
|
||||
show_str = property->pd_values ? property->pd_values : "";
|
||||
} else if (strcmp(attr_name, "default") == 0) {
|
||||
switch (property->pd_proptype) {
|
||||
case PROP_TYPE_NUMBER:
|
||||
(void) snprintf(number, sizeof (number), "%llu",
|
||||
(u_longlong_t)property->pd_numdefault);
|
||||
show_str = number;
|
||||
break;
|
||||
case PROP_TYPE_STRING:
|
||||
show_str = property->pd_strdefault ?
|
||||
property->pd_strdefault : "";
|
||||
break;
|
||||
case PROP_TYPE_INDEX:
|
||||
if (zprop_index_to_string(property->pd_propnum,
|
||||
property->pd_numdefault, &show_str,
|
||||
property->pd_types) != 0) {
|
||||
show_str = "";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return (0);
|
||||
}
|
||||
} else {
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (snprintf(buf, buflen, "%s\n", show_str));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
dataset_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
zfs_prop_t prop = zfs_name_to_prop(kobject_name(kobj));
|
||||
zprop_desc_t *prop_tbl = zfs_prop_get_table();
|
||||
ssize_t len;
|
||||
|
||||
ASSERT3U(prop, <, ZFS_NUM_PROPS);
|
||||
|
||||
len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
zpool_prop_t prop = zpool_name_to_prop(kobject_name(kobj));
|
||||
zprop_desc_t *prop_tbl = zpool_prop_get_table();
|
||||
ssize_t len;
|
||||
|
||||
ASSERT3U(prop, <, ZPOOL_NUM_PROPS);
|
||||
|
||||
len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
/*
|
||||
* ZFS kernel feature attributes for '/sys/module/zfs/features.kernel'
|
||||
*
|
||||
* This list is intended for kernel features that don't have a pool feature
|
||||
* association or that extend existing user kernel interfaces.
|
||||
*
|
||||
* A user processes can easily check if the running zfs kernel module
|
||||
* supports the new feature.
|
||||
*/
|
||||
static const char *zfs_kernel_features[] = {
|
||||
/* --> Add new kernel features here */
|
||||
"com.delphix:vdev_initialize",
|
||||
"org.zfsonlinux:vdev_trim",
|
||||
};
|
||||
|
||||
#define KERNEL_FEATURE_COUNT ARRAY_SIZE(zfs_kernel_features)
|
||||
|
||||
static ssize_t
|
||||
kernel_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
if (strcmp(attr->name, "supported") == 0)
|
||||
return (snprintf(buf, PAGE_SIZE, "yes\n"));
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
kernel_feature_to_kobj(zfs_mod_kobj_t *parent, int slot, const char *name)
|
||||
{
|
||||
zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[slot];
|
||||
|
||||
ASSERT3U(slot, <, KERNEL_FEATURE_COUNT);
|
||||
ASSERT(name);
|
||||
|
||||
int err = zfs_kobj_init(zfs_kobj, 1, 0, kernel_feature_show);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
zfs_kobj_add_attr(zfs_kobj, 0, "supported");
|
||||
|
||||
err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
|
||||
if (err)
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_kernel_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
|
||||
{
|
||||
/*
|
||||
* Create a parent kobject to host kernel features.
|
||||
*
|
||||
* '/sys/module/zfs/features.kernel'
|
||||
*/
|
||||
int err = zfs_kobj_init(zfs_kobj, 0, KERNEL_FEATURE_COUNT,
|
||||
kernel_feature_show);
|
||||
if (err)
|
||||
return (err);
|
||||
err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_KERNEL_FEATURES);
|
||||
if (err) {
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now create a kobject for each feature.
|
||||
*
|
||||
* '/sys/module/zfs/features.kernel/<feature>'
|
||||
*/
|
||||
for (int f = 0; f < KERNEL_FEATURE_COUNT; f++)
|
||||
kernel_feature_to_kobj(zfs_kobj, f, zfs_kernel_features[f]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each pool feature has these common attributes
|
||||
*/
|
||||
static const char *pool_feature_attrs[] = {
|
||||
"description",
|
||||
"guid",
|
||||
"uname",
|
||||
"readonly_compatible",
|
||||
"required_for_mos",
|
||||
"activate_on_enable",
|
||||
"per_dataset"
|
||||
};
|
||||
|
||||
#define ZPOOL_FEATURE_ATTR_COUNT ARRAY_SIZE(pool_feature_attrs)
|
||||
|
||||
/*
|
||||
* Show the content for the given zfs pool feature attribute
|
||||
*/
|
||||
static ssize_t
|
||||
pool_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
spa_feature_t fid;
|
||||
|
||||
if (zfeature_lookup_guid(kobject_name(kobj), &fid) != 0)
|
||||
return (0);
|
||||
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
|
||||
zfeature_flags_t flags = spa_feature_table[fid].fi_flags;
|
||||
const char *show_str = NULL;
|
||||
|
||||
if (strcmp(attr->name, "description") == 0) {
|
||||
show_str = spa_feature_table[fid].fi_desc;
|
||||
} else if (strcmp(attr->name, "guid") == 0) {
|
||||
show_str = spa_feature_table[fid].fi_guid;
|
||||
} else if (strcmp(attr->name, "uname") == 0) {
|
||||
show_str = spa_feature_table[fid].fi_uname;
|
||||
} else if (strcmp(attr->name, "readonly_compatible") == 0) {
|
||||
show_str = flags & ZFEATURE_FLAG_READONLY_COMPAT ? "1" : "0";
|
||||
} else if (strcmp(attr->name, "required_for_mos") == 0) {
|
||||
show_str = flags & ZFEATURE_FLAG_MOS ? "1" : "0";
|
||||
} else if (strcmp(attr->name, "activate_on_enable") == 0) {
|
||||
show_str = flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE ? "1" : "0";
|
||||
} else if (strcmp(attr->name, "per_dataset") == 0) {
|
||||
show_str = flags & ZFEATURE_FLAG_PER_DATASET ? "1" : "0";
|
||||
}
|
||||
if (show_str == NULL)
|
||||
return (0);
|
||||
|
||||
return (snprintf(buf, PAGE_SIZE, "%s\n", show_str));
|
||||
}
|
||||
|
||||
static void
|
||||
pool_feature_to_kobj(zfs_mod_kobj_t *parent, spa_feature_t fid,
|
||||
const char *name)
|
||||
{
|
||||
zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[fid];
|
||||
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(name);
|
||||
|
||||
int err = zfs_kobj_init(zfs_kobj, ZPOOL_FEATURE_ATTR_COUNT, 0,
|
||||
pool_feature_show);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < ZPOOL_FEATURE_ATTR_COUNT; i++)
|
||||
zfs_kobj_add_attr(zfs_kobj, i, pool_feature_attrs[i]);
|
||||
|
||||
err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
|
||||
if (err)
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_pool_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
|
||||
{
|
||||
/*
|
||||
* Create a parent kobject to host pool features.
|
||||
*
|
||||
* '/sys/module/zfs/features.pool'
|
||||
*/
|
||||
int err = zfs_kobj_init(zfs_kobj, 0, SPA_FEATURES, pool_feature_show);
|
||||
if (err)
|
||||
return (err);
|
||||
err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_POOL_FEATURES);
|
||||
if (err) {
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now create a kobject for each feature.
|
||||
*
|
||||
* '/sys/module/zfs/features.pool/<feature>'
|
||||
*/
|
||||
for (spa_feature_t i = 0; i < SPA_FEATURES; i++)
|
||||
pool_feature_to_kobj(zfs_kobj, i, spa_feature_table[i].fi_guid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
typedef struct prop_to_kobj_arg {
|
||||
zprop_desc_t *p2k_table;
|
||||
zfs_mod_kobj_t *p2k_parent;
|
||||
sysfs_show_func p2k_show_func;
|
||||
int p2k_attr_count;
|
||||
} prop_to_kobj_arg_t;
|
||||
|
||||
static int
|
||||
zprop_to_kobj(int prop, void *args)
|
||||
{
|
||||
prop_to_kobj_arg_t *data = args;
|
||||
zfs_mod_kobj_t *parent = data->p2k_parent;
|
||||
zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[prop];
|
||||
const char *name = data->p2k_table[prop].pd_name;
|
||||
int err;
|
||||
|
||||
ASSERT(name);
|
||||
|
||||
err = zfs_kobj_init(zfs_kobj, data->p2k_attr_count, 0,
|
||||
data->p2k_show_func);
|
||||
if (err)
|
||||
return (ZPROP_CONT);
|
||||
|
||||
for (int i = 0; i < data->p2k_attr_count; i++)
|
||||
zfs_kobj_add_attr(zfs_kobj, i, zprop_attrs[i]);
|
||||
|
||||
err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
|
||||
if (err)
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
|
||||
return (ZPROP_CONT);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_sysfs_properties_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent,
|
||||
zfs_type_t type)
|
||||
{
|
||||
prop_to_kobj_arg_t context;
|
||||
const char *name;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Create a parent kobject to host properties.
|
||||
*
|
||||
* '/sys/module/zfs/properties.<type>'
|
||||
*/
|
||||
if (type == ZFS_TYPE_POOL) {
|
||||
name = ZFS_SYSFS_POOL_PROPERTIES;
|
||||
context.p2k_table = zpool_prop_get_table();
|
||||
context.p2k_attr_count = ZPOOL_PROP_ATTR_COUNT;
|
||||
context.p2k_parent = zfs_kobj;
|
||||
context.p2k_show_func = pool_property_show;
|
||||
err = zfs_kobj_init(zfs_kobj, 0, ZPOOL_NUM_PROPS,
|
||||
pool_property_show);
|
||||
} else {
|
||||
name = ZFS_SYSFS_DATASET_PROPERTIES;
|
||||
context.p2k_table = zfs_prop_get_table();
|
||||
context.p2k_attr_count = ZFS_PROP_ATTR_COUNT;
|
||||
context.p2k_parent = zfs_kobj;
|
||||
context.p2k_show_func = dataset_property_show;
|
||||
err = zfs_kobj_init(zfs_kobj, 0, ZFS_NUM_PROPS,
|
||||
dataset_property_show);
|
||||
}
|
||||
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = zfs_kobj_add(zfs_kobj, parent, name);
|
||||
if (err) {
|
||||
zfs_kobj_release(&zfs_kobj->zko_kobj);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a kobject for each property.
|
||||
*
|
||||
* '/sys/module/zfs/properties.<type>/<property>'
|
||||
*/
|
||||
(void) zprop_iter_common(zprop_to_kobj, &context, B_TRUE,
|
||||
B_FALSE, type);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_sysfs_init(void)
|
||||
{
|
||||
struct kobject *parent;
|
||||
#if defined(CONFIG_ZFS) && !defined(CONFIG_ZFS_MODULE)
|
||||
parent = kobject_create_and_add("zfs", fs_kobj);
|
||||
#else
|
||||
parent = &(((struct module *)(THIS_MODULE))->mkobj).kobj;
|
||||
#endif
|
||||
int err;
|
||||
|
||||
if (parent == NULL)
|
||||
return;
|
||||
|
||||
err = zfs_kernel_features_init(&kernel_features_kobj, parent);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
err = zfs_pool_features_init(&pool_features_kobj, parent);
|
||||
if (err) {
|
||||
zfs_kobj_fini(&kernel_features_kobj);
|
||||
return;
|
||||
}
|
||||
|
||||
err = zfs_sysfs_properties_init(&pool_props_kobj, parent,
|
||||
ZFS_TYPE_POOL);
|
||||
if (err) {
|
||||
zfs_kobj_fini(&kernel_features_kobj);
|
||||
zfs_kobj_fini(&pool_features_kobj);
|
||||
return;
|
||||
}
|
||||
|
||||
err = zfs_sysfs_properties_init(&dataset_props_kobj, parent,
|
||||
ZFS_TYPE_FILESYSTEM);
|
||||
if (err) {
|
||||
zfs_kobj_fini(&kernel_features_kobj);
|
||||
zfs_kobj_fini(&pool_features_kobj);
|
||||
zfs_kobj_fini(&pool_props_kobj);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zfs_sysfs_fini(void)
|
||||
{
|
||||
/*
|
||||
* Remove top-level kobjects; each will remove any children kobjects
|
||||
*/
|
||||
zfs_kobj_fini(&kernel_features_kobj);
|
||||
zfs_kobj_fini(&pool_features_kobj);
|
||||
zfs_kobj_fini(&dataset_props_kobj);
|
||||
zfs_kobj_fini(&pool_props_kobj);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,572 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2011 Lawrence Livermore National Security, LLC.
|
||||
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
|
||||
* LLNL-CODE-403049.
|
||||
* Rewritten for Linux by:
|
||||
* Rohan Puri <rohan.puri15@gmail.com>
|
||||
* Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
*/
|
||||
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zpl.h>
|
||||
|
||||
/*
|
||||
* Common open routine. Disallow any write access.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zpl_common_open(struct inode *ip, struct file *filp)
|
||||
{
|
||||
if (filp->f_mode & FMODE_WRITE)
|
||||
return (-EACCES);
|
||||
|
||||
return (generic_file_open(ip, filp));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get root directory contents.
|
||||
*/
|
||||
static int
|
||||
zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
|
||||
int error = 0;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (!zpl_dir_emit_dots(filp, ctx))
|
||||
goto out;
|
||||
|
||||
if (ctx->pos == 2) {
|
||||
if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME,
|
||||
strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR))
|
||||
goto out;
|
||||
|
||||
ctx->pos++;
|
||||
}
|
||||
|
||||
if (ctx->pos == 3) {
|
||||
if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME,
|
||||
strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR))
|
||||
goto out;
|
||||
|
||||
ctx->pos++;
|
||||
}
|
||||
out:
|
||||
ZFS_EXIT(zfsvfs);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
|
||||
static int
|
||||
zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||
{
|
||||
zpl_dir_context_t ctx =
|
||||
ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
||||
int error;
|
||||
|
||||
error = zpl_root_iterate(filp, &ctx);
|
||||
filp->f_pos = ctx.pos;
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
|
||||
|
||||
/*
|
||||
* Get root directory attributes.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
struct inode *ip = path->dentry->d_inode;
|
||||
|
||||
generic_fillattr(ip, stat);
|
||||
stat->atime = current_time(ip);
|
||||
|
||||
return (0);
|
||||
}
|
||||
ZPL_GETATTR_WRAPPER(zpl_root_getattr);
|
||||
|
||||
static struct dentry *
|
||||
#ifdef HAVE_LOOKUP_NAMEIDATA
|
||||
zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
|
||||
#else
|
||||
zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
|
||||
#endif
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip;
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
|
||||
ASSERT3S(error, <=, 0);
|
||||
crfree(cr);
|
||||
|
||||
if (error) {
|
||||
if (error == -ENOENT)
|
||||
return (d_splice_alias(NULL, dentry));
|
||||
else
|
||||
return (ERR_PTR(error));
|
||||
}
|
||||
|
||||
return (d_splice_alias(ip, dentry));
|
||||
}
|
||||
|
||||
/*
|
||||
* The '.zfs' control directory file and inode operations.
|
||||
*/
|
||||
const struct file_operations zpl_fops_root = {
|
||||
.open = zpl_common_open,
|
||||
.llseek = generic_file_llseek,
|
||||
.read = generic_read_dir,
|
||||
#ifdef HAVE_VFS_ITERATE_SHARED
|
||||
.iterate_shared = zpl_root_iterate,
|
||||
#elif defined(HAVE_VFS_ITERATE)
|
||||
.iterate = zpl_root_iterate,
|
||||
#else
|
||||
.readdir = zpl_root_readdir,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct inode_operations zpl_ops_root = {
|
||||
.lookup = zpl_root_lookup,
|
||||
.getattr = zpl_root_getattr,
|
||||
};
|
||||
|
||||
#ifdef HAVE_AUTOMOUNT
|
||||
static struct vfsmount *
|
||||
zpl_snapdir_automount(struct path *path)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = -zfsctl_snapshot_mount(path, 0);
|
||||
if (error)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
/*
|
||||
* Rather than returning the new vfsmount for the snapshot we must
|
||||
* return NULL to indicate a mount collision. This is done because
|
||||
* the user space mount calls do_add_mount() which adds the vfsmount
|
||||
* to the name space. If we returned the new mount here it would be
|
||||
* added again to the vfsmount list resulting in list corruption.
|
||||
*/
|
||||
return (NULL);
|
||||
}
|
||||
#endif /* HAVE_AUTOMOUNT */
|
||||
|
||||
/*
|
||||
* Negative dentries must always be revalidated so newly created snapshots
|
||||
* can be detected and automounted. Normal dentries should be kept because
|
||||
* as of the 3.18 kernel revaliding the mountpoint dentry will result in
|
||||
* the snapshot being immediately unmounted.
|
||||
*/
|
||||
static int
|
||||
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
|
||||
zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
|
||||
#else
|
||||
zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
#endif
|
||||
{
|
||||
return (!!dentry->d_inode);
|
||||
}
|
||||
|
||||
dentry_operations_t zpl_dops_snapdirs = {
|
||||
/*
|
||||
* Auto mounting of snapshots is only supported for 2.6.37 and
|
||||
* newer kernels. Prior to this kernel the ops->follow_link()
|
||||
* callback was used as a hack to trigger the mount. The
|
||||
* resulting vfsmount was then explicitly grafted in to the
|
||||
* name space. While it might be possible to add compatibility
|
||||
* code to accomplish this it would require considerable care.
|
||||
*/
|
||||
#ifdef HAVE_AUTOMOUNT
|
||||
.d_automount = zpl_snapdir_automount,
|
||||
#endif /* HAVE_AUTOMOUNT */
|
||||
.d_revalidate = zpl_snapdir_revalidate,
|
||||
};
|
||||
|
||||
static struct dentry *
|
||||
#ifdef HAVE_LOOKUP_NAMEIDATA
|
||||
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
|
||||
struct nameidata *nd)
|
||||
#else
|
||||
zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
#endif
|
||||
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip = NULL;
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
|
||||
0, cr, NULL, NULL);
|
||||
ASSERT3S(error, <=, 0);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
|
||||
if (error && error != -ENOENT)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
ASSERT(error == 0 || ip == NULL);
|
||||
d_clear_d_op(dentry);
|
||||
d_set_d_op(dentry, &zpl_dops_snapdirs);
|
||||
#ifdef HAVE_AUTOMOUNT
|
||||
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
|
||||
#endif
|
||||
|
||||
return (d_splice_alias(ip, dentry));
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
|
||||
fstrans_cookie_t cookie;
|
||||
char snapname[MAXNAMELEN];
|
||||
boolean_t case_conflict;
|
||||
uint64_t id, pos;
|
||||
int error = 0;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
if (!zpl_dir_emit_dots(filp, ctx))
|
||||
goto out;
|
||||
|
||||
pos = ctx->pos;
|
||||
while (error == 0) {
|
||||
dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
|
||||
error = -dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
|
||||
snapname, &id, &pos, &case_conflict);
|
||||
dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if (!zpl_dir_emit(ctx, snapname, strlen(snapname),
|
||||
ZFSCTL_INO_SHARES - id, DT_DIR))
|
||||
goto out;
|
||||
|
||||
ctx->pos = pos;
|
||||
}
|
||||
out:
|
||||
spl_fstrans_unmark(cookie);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
|
||||
if (error == -ENOENT)
|
||||
return (0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
|
||||
static int
|
||||
zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||
{
|
||||
zpl_dir_context_t ctx =
|
||||
ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
||||
int error;
|
||||
|
||||
error = zpl_snapdir_iterate(filp, &ctx);
|
||||
filp->f_pos = ctx.pos;
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
|
||||
|
||||
static int
|
||||
zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
|
||||
struct inode *tdip, struct dentry *tdentry, unsigned int flags)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
|
||||
/* We probably don't want to support renameat2(2) in ctldir */
|
||||
if (flags)
|
||||
return (-EINVAL);
|
||||
|
||||
crhold(cr);
|
||||
error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
|
||||
tdip, dname(tdentry), cr, 0);
|
||||
ASSERT3S(error, <=, 0);
|
||||
crfree(cr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifndef HAVE_RENAME_WANTS_FLAGS
|
||||
static int
|
||||
zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
|
||||
struct inode *tdip, struct dentry *tdentry)
|
||||
{
|
||||
return (zpl_snapdir_rename2(sdip, sdentry, tdip, tdentry, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
|
||||
ASSERT3S(error, <=, 0);
|
||||
crfree(cr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
vattr_t *vap;
|
||||
struct inode *ip;
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
|
||||
|
||||
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
|
||||
if (error == 0) {
|
||||
d_clear_d_op(dentry);
|
||||
d_set_d_op(dentry, &zpl_dops_snapdirs);
|
||||
d_instantiate(dentry, ip);
|
||||
}
|
||||
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
ASSERT3S(error, <=, 0);
|
||||
crfree(cr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get snapshot directory attributes.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
struct inode *ip = path->dentry->d_inode;
|
||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
generic_fillattr(ip, stat);
|
||||
|
||||
stat->nlink = stat->size = 2;
|
||||
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
|
||||
stat->atime = current_time(ip);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
|
||||
return (0);
|
||||
}
|
||||
ZPL_GETATTR_WRAPPER(zpl_snapdir_getattr);
|
||||
|
||||
/*
|
||||
* The '.zfs/snapshot' directory file operations. These mainly control
|
||||
* generating the list of available snapshots when doing an 'ls' in the
|
||||
* directory. See zpl_snapdir_readdir().
|
||||
*/
|
||||
const struct file_operations zpl_fops_snapdir = {
|
||||
.open = zpl_common_open,
|
||||
.llseek = generic_file_llseek,
|
||||
.read = generic_read_dir,
|
||||
#ifdef HAVE_VFS_ITERATE_SHARED
|
||||
.iterate_shared = zpl_snapdir_iterate,
|
||||
#elif defined(HAVE_VFS_ITERATE)
|
||||
.iterate = zpl_snapdir_iterate,
|
||||
#else
|
||||
.readdir = zpl_snapdir_readdir,
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* The '.zfs/snapshot' directory inode operations. These mainly control
|
||||
* creating an inode for a snapshot directory and initializing the needed
|
||||
* infrastructure to automount the snapshot. See zpl_snapdir_lookup().
|
||||
*/
|
||||
const struct inode_operations zpl_ops_snapdir = {
|
||||
.lookup = zpl_snapdir_lookup,
|
||||
.getattr = zpl_snapdir_getattr,
|
||||
#ifdef HAVE_RENAME_WANTS_FLAGS
|
||||
.rename = zpl_snapdir_rename2,
|
||||
#else
|
||||
.rename = zpl_snapdir_rename,
|
||||
#endif
|
||||
.rmdir = zpl_snapdir_rmdir,
|
||||
.mkdir = zpl_snapdir_mkdir,
|
||||
};
|
||||
|
||||
static struct dentry *
|
||||
#ifdef HAVE_LOOKUP_NAMEIDATA
|
||||
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
|
||||
struct nameidata *nd)
|
||||
#else
|
||||
zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
#endif
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip = NULL;
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
|
||||
0, cr, NULL, NULL);
|
||||
ASSERT3S(error, <=, 0);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
|
||||
if (error) {
|
||||
if (error == -ENOENT)
|
||||
return (d_splice_alias(NULL, dentry));
|
||||
else
|
||||
return (ERR_PTR(error));
|
||||
}
|
||||
|
||||
return (d_splice_alias(ip, dentry));
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
cred_t *cr = CRED();
|
||||
zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
|
||||
znode_t *dzp;
|
||||
int error = 0;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
zpl_dir_emit_dots(filp, ctx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
crhold(cr);
|
||||
error = -zfs_readdir(ZTOI(dzp), ctx, cr);
|
||||
crfree(cr);
|
||||
|
||||
iput(ZTOI(dzp));
|
||||
out:
|
||||
spl_fstrans_unmark(cookie);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
|
||||
static int
|
||||
zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||
{
|
||||
zpl_dir_context_t ctx =
|
||||
ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
|
||||
int error;
|
||||
|
||||
error = zpl_shares_iterate(filp, &ctx);
|
||||
filp->f_pos = ctx.pos;
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
struct inode *ip = path->dentry->d_inode;
|
||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
znode_t *dzp;
|
||||
int error;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
generic_fillattr(path->dentry->d_inode, stat);
|
||||
stat->nlink = stat->size = 2;
|
||||
stat->atime = current_time(ip);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
|
||||
if (error == 0) {
|
||||
error = -zfs_getattr_fast(ZTOI(dzp), stat);
|
||||
iput(ZTOI(dzp));
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
ZPL_GETATTR_WRAPPER(zpl_shares_getattr);
|
||||
|
||||
/*
|
||||
* The '.zfs/shares' directory file operations.
|
||||
*/
|
||||
const struct file_operations zpl_fops_shares = {
|
||||
.open = zpl_common_open,
|
||||
.llseek = generic_file_llseek,
|
||||
.read = generic_read_dir,
|
||||
#ifdef HAVE_VFS_ITERATE_SHARED
|
||||
.iterate_shared = zpl_shares_iterate,
|
||||
#elif defined(HAVE_VFS_ITERATE)
|
||||
.iterate = zpl_shares_iterate,
|
||||
#else
|
||||
.readdir = zpl_shares_readdir,
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* The '.zfs/shares' directory inode operations.
|
||||
*/
|
||||
const struct inode_operations zpl_ops_shares = {
|
||||
.lookup = zpl_shares_lookup,
|
||||
.getattr = zpl_shares_getattr,
|
||||
};
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2011 Gunnar Beutner
|
||||
* Copyright (c) 2012 Cyril Plisko. All rights reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zpl.h>
|
||||
|
||||
|
||||
static int
|
||||
#ifdef HAVE_ENCODE_FH_WITH_INODE
|
||||
zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
|
||||
{
|
||||
#else
|
||||
zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
|
||||
{
|
||||
/* CSTYLED */
|
||||
struct inode *ip = dentry->d_inode;
|
||||
#endif /* HAVE_ENCODE_FH_WITH_INODE */
|
||||
fstrans_cookie_t cookie;
|
||||
fid_t *fid = (fid_t *)fh;
|
||||
int len_bytes, rc;
|
||||
|
||||
len_bytes = *max_len * sizeof (__u32);
|
||||
|
||||
if (len_bytes < offsetof(fid_t, fid_data))
|
||||
return (255);
|
||||
|
||||
fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
if (zfsctl_is_node(ip))
|
||||
rc = zfsctl_fid(ip, fid);
|
||||
else
|
||||
rc = zfs_fid(ip, fid);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
|
||||
*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
|
||||
|
||||
return (rc == 0 ? FILEID_INO32_GEN : 255);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
zpl_dentry_obtain_alias(struct inode *ip)
|
||||
{
|
||||
struct dentry *result;
|
||||
|
||||
#ifdef HAVE_D_OBTAIN_ALIAS
|
||||
result = d_obtain_alias(ip);
|
||||
#else
|
||||
result = d_alloc_anon(ip);
|
||||
|
||||
if (result == NULL) {
|
||||
iput(ip);
|
||||
result = ERR_PTR(-ENOMEM);
|
||||
}
|
||||
#endif /* HAVE_D_OBTAIN_ALIAS */
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||
int fh_len, int fh_type)
|
||||
{
|
||||
fid_t *fid = (fid_t *)fh;
|
||||
fstrans_cookie_t cookie;
|
||||
struct inode *ip;
|
||||
int len_bytes, rc;
|
||||
|
||||
len_bytes = fh_len * sizeof (__u32);
|
||||
|
||||
if (fh_type != FILEID_INO32_GEN ||
|
||||
len_bytes < offsetof(fid_t, fid_data) ||
|
||||
len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
|
||||
return (ERR_PTR(-EINVAL));
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
rc = zfs_vget(sb, &ip, fid);
|
||||
spl_fstrans_unmark(cookie);
|
||||
|
||||
if (rc) {
|
||||
/*
|
||||
* If we see ENOENT it might mean that an NFSv4 * client
|
||||
* is using a cached inode value in a file handle and
|
||||
* that the sought after file has had its inode changed
|
||||
* by a third party. So change the error to ESTALE
|
||||
* which will trigger a full lookup by the client and
|
||||
* will find the new filename/inode pair if it still
|
||||
* exists.
|
||||
*/
|
||||
if (rc == ENOENT)
|
||||
rc = ESTALE;
|
||||
|
||||
return (ERR_PTR(-rc));
|
||||
}
|
||||
|
||||
ASSERT((ip != NULL) && !IS_ERR(ip));
|
||||
|
||||
return (zpl_dentry_obtain_alias(ip));
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
zpl_get_parent(struct dentry *child)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
fstrans_cookie_t cookie;
|
||||
struct inode *ip;
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_lookup(child->d_inode, "..", &ip, 0, cr, NULL, NULL);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
if (error)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
return (zpl_dentry_obtain_alias(ip));
|
||||
}
|
||||
|
||||
#ifdef HAVE_COMMIT_METADATA
|
||||
static int
|
||||
zpl_commit_metadata(struct inode *inode)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
fstrans_cookie_t cookie;
|
||||
int error;
|
||||
|
||||
if (zfsctl_is_node(inode))
|
||||
return (0);
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_fsync(inode, 0, cr);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* HAVE_COMMIT_METADATA */
|
||||
|
||||
const struct export_operations zpl_export_operations = {
|
||||
.encode_fh = zpl_encode_fh,
|
||||
.fh_to_dentry = zpl_fh_to_dentry,
|
||||
.get_parent = zpl_get_parent,
|
||||
#ifdef HAVE_COMMIT_METADATA
|
||||
.commit_metadata = zpl_commit_metadata,
|
||||
#endif /* HAVE_COMMIT_METADATA */
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,826 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
|
||||
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/zpl.h>
|
||||
#include <sys/file.h>
|
||||
|
||||
|
||||
static struct dentry *
|
||||
#ifdef HAVE_LOOKUP_NAMEIDATA
|
||||
zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
||||
#else
|
||||
zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
|
||||
#endif
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
pathname_t *ppn = NULL;
|
||||
pathname_t pn;
|
||||
int zfs_flags = 0;
|
||||
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
|
||||
|
||||
if (dlen(dentry) >= ZAP_MAXNAMELEN)
|
||||
return (ERR_PTR(-ENAMETOOLONG));
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
/* If we are a case insensitive fs, we need the real name */
|
||||
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
|
||||
zfs_flags = FIGNORECASE;
|
||||
pn_alloc(&pn);
|
||||
ppn = &pn;
|
||||
}
|
||||
|
||||
error = -zfs_lookup(dir, dname(dentry), &ip, zfs_flags, cr, NULL, ppn);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
crfree(cr);
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
dentry->d_time = jiffies;
|
||||
#ifndef HAVE_S_D_OP
|
||||
d_set_d_op(dentry, &zpl_dentry_operations);
|
||||
#endif /* HAVE_S_D_OP */
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
if (error) {
|
||||
/*
|
||||
* If we have a case sensitive fs, we do not want to
|
||||
* insert negative entries, so return NULL for ENOENT.
|
||||
* Fall through if the error is not ENOENT. Also free memory.
|
||||
*/
|
||||
if (ppn) {
|
||||
pn_free(ppn);
|
||||
if (error == -ENOENT)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (error == -ENOENT)
|
||||
return (d_splice_alias(NULL, dentry));
|
||||
else
|
||||
return (ERR_PTR(error));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are case insensitive, call the correct function
|
||||
* to install the name.
|
||||
*/
|
||||
if (ppn) {
|
||||
struct dentry *new_dentry;
|
||||
struct qstr ci_name;
|
||||
|
||||
if (strcmp(dname(dentry), pn.pn_buf) == 0) {
|
||||
new_dentry = d_splice_alias(ip, dentry);
|
||||
} else {
|
||||
ci_name.name = pn.pn_buf;
|
||||
ci_name.len = strlen(pn.pn_buf);
|
||||
new_dentry = d_add_ci(dentry, ip, &ci_name);
|
||||
}
|
||||
pn_free(ppn);
|
||||
return (new_dentry);
|
||||
} else {
|
||||
return (d_splice_alias(ip, dentry));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr)
|
||||
{
|
||||
vap->va_mask = ATTR_MODE;
|
||||
vap->va_mode = mode;
|
||||
vap->va_uid = crgetfsuid(cr);
|
||||
|
||||
if (dir && dir->i_mode & S_ISGID) {
|
||||
vap->va_gid = KGID_TO_SGID(dir->i_gid);
|
||||
if (S_ISDIR(mode))
|
||||
vap->va_mode |= S_ISGID;
|
||||
} else {
|
||||
vap->va_gid = crgetfsgid(cr);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
#ifdef HAVE_CREATE_NAMEIDATA
|
||||
zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
|
||||
struct nameidata *nd)
|
||||
#else
|
||||
zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
|
||||
bool flag)
|
||||
#endif
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip;
|
||||
vattr_t *vap;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dir, mode, cr);
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
|
||||
if (error == 0) {
|
||||
d_instantiate(dentry, ip);
|
||||
|
||||
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
|
||||
if (error == 0)
|
||||
error = zpl_init_acl(ip, dir);
|
||||
|
||||
if (error)
|
||||
(void) zfs_remove(dir, dname(dentry), cr, 0);
|
||||
}
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip;
|
||||
vattr_t *vap;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
/*
|
||||
* We currently expect Linux to supply rdev=0 for all sockets
|
||||
* and fifos, but we want to know if this behavior ever changes.
|
||||
*/
|
||||
if (S_ISSOCK(mode) || S_ISFIFO(mode))
|
||||
ASSERT(rdev == 0);
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dir, mode, cr);
|
||||
vap->va_rdev = rdev;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
|
||||
if (error == 0) {
|
||||
d_instantiate(dentry, ip);
|
||||
|
||||
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
|
||||
if (error == 0)
|
||||
error = zpl_init_acl(ip, dir);
|
||||
|
||||
if (error)
|
||||
(void) zfs_remove(dir, dname(dentry), cr, 0);
|
||||
}
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef HAVE_TMPFILE
|
||||
static int
|
||||
zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip;
|
||||
vattr_t *vap;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dir, mode, cr);
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
|
||||
if (error == 0) {
|
||||
/* d_tmpfile will do drop_nlink, so we should set it first */
|
||||
set_nlink(ip, 1);
|
||||
d_tmpfile(dentry, ip);
|
||||
|
||||
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
|
||||
if (error == 0)
|
||||
error = zpl_init_acl(ip, dir);
|
||||
/*
|
||||
* don't need to handle error here, file is already in
|
||||
* unlinked set.
|
||||
*/
|
||||
}
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zpl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_remove(dir, dname(dentry), cr, 0);
|
||||
|
||||
/*
|
||||
* For a CI FS we must invalidate the dentry to prevent the
|
||||
* creation of negative entries.
|
||||
*/
|
||||
if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
|
||||
d_invalidate(dentry);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
vattr_t *vap;
|
||||
struct inode *ip;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
|
||||
if (error == 0) {
|
||||
d_instantiate(dentry, ip);
|
||||
|
||||
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
|
||||
if (error == 0)
|
||||
error = zpl_init_acl(ip, dir);
|
||||
|
||||
if (error)
|
||||
(void) zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
|
||||
}
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
|
||||
|
||||
/*
|
||||
* For a CI FS we must invalidate the dentry to prevent the
|
||||
* creation of negative entries.
|
||||
*/
|
||||
if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
|
||||
d_invalidate(dentry);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
|
||||
unsigned int query_flags)
|
||||
{
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
/*
|
||||
* XXX request_mask and query_flags currently ignored.
|
||||
*/
|
||||
|
||||
error = -zfs_getattr_fast(path->dentry->d_inode, stat);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
ZPL_GETATTR_WRAPPER(zpl_getattr);
|
||||
|
||||
static int
|
||||
zpl_setattr(struct dentry *dentry, struct iattr *ia)
|
||||
{
|
||||
struct inode *ip = dentry->d_inode;
|
||||
cred_t *cr = CRED();
|
||||
vattr_t *vap;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
error = setattr_prepare(dentry, ia);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
|
||||
vap->va_mode = ia->ia_mode;
|
||||
vap->va_uid = KUID_TO_SUID(ia->ia_uid);
|
||||
vap->va_gid = KGID_TO_SGID(ia->ia_gid);
|
||||
vap->va_size = ia->ia_size;
|
||||
vap->va_atime = ia->ia_atime;
|
||||
vap->va_mtime = ia->ia_mtime;
|
||||
vap->va_ctime = ia->ia_ctime;
|
||||
|
||||
if (vap->va_mask & ATTR_ATIME) {
|
||||
ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime,
|
||||
ip->i_sb->s_time_gran);
|
||||
}
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_setattr(ip, vap, 0, cr);
|
||||
if (!error && (ia->ia_valid & ATTR_MODE))
|
||||
error = zpl_chmod_acl(ip);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_rename2(struct inode *sdip, struct dentry *sdentry,
|
||||
struct inode *tdip, struct dentry *tdentry, unsigned int flags)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
/* We don't have renameat2(2) support */
|
||||
if (flags)
|
||||
return (-EINVAL);
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifndef HAVE_RENAME_WANTS_FLAGS
|
||||
static int
|
||||
zpl_rename(struct inode *sdip, struct dentry *sdentry,
|
||||
struct inode *tdip, struct dentry *tdentry)
|
||||
{
|
||||
return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
vattr_t *vap;
|
||||
struct inode *ip;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
crhold(cr);
|
||||
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
|
||||
zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
|
||||
if (error == 0) {
|
||||
d_instantiate(dentry, ip);
|
||||
|
||||
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
|
||||
if (error)
|
||||
(void) zfs_remove(dir, dname(dentry), cr, 0);
|
||||
}
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
kmem_free(vap, sizeof (vattr_t));
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if defined(HAVE_PUT_LINK_COOKIE)
|
||||
static void
|
||||
zpl_put_link(struct inode *unused, void *cookie)
|
||||
{
|
||||
kmem_free(cookie, MAXPATHLEN);
|
||||
}
|
||||
#elif defined(HAVE_PUT_LINK_NAMEIDATA)
|
||||
static void
|
||||
zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
|
||||
{
|
||||
const char *link = nd_get_link(nd);
|
||||
|
||||
if (!IS_ERR(link))
|
||||
kmem_free(link, MAXPATHLEN);
|
||||
}
|
||||
#elif defined(HAVE_PUT_LINK_DELAYED)
|
||||
static void
|
||||
zpl_put_link(void *ptr)
|
||||
{
|
||||
kmem_free(ptr, MAXPATHLEN);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
cred_t *cr = CRED();
|
||||
struct iovec iov;
|
||||
uio_t uio = { { 0 }, 0 };
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
*link = NULL;
|
||||
iov.iov_len = MAXPATHLEN;
|
||||
iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
|
||||
|
||||
uio.uio_iov = &iov;
|
||||
uio.uio_iovcnt = 1;
|
||||
uio.uio_segflg = UIO_SYSSPACE;
|
||||
uio.uio_resid = (MAXPATHLEN - 1);
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_readlink(ip, &uio, cr);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
|
||||
if (error)
|
||||
kmem_free(iov.iov_base, MAXPATHLEN);
|
||||
else
|
||||
*link = iov.iov_base;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if defined(HAVE_GET_LINK_DELAYED)
|
||||
const char *
|
||||
zpl_get_link(struct dentry *dentry, struct inode *inode,
|
||||
struct delayed_call *done)
|
||||
{
|
||||
char *link = NULL;
|
||||
int error;
|
||||
|
||||
if (!dentry)
|
||||
return (ERR_PTR(-ECHILD));
|
||||
|
||||
error = zpl_get_link_common(dentry, inode, &link);
|
||||
if (error)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
set_delayed_call(done, zpl_put_link, link);
|
||||
|
||||
return (link);
|
||||
}
|
||||
#elif defined(HAVE_GET_LINK_COOKIE)
|
||||
const char *
|
||||
zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie)
|
||||
{
|
||||
char *link = NULL;
|
||||
int error;
|
||||
|
||||
if (!dentry)
|
||||
return (ERR_PTR(-ECHILD));
|
||||
|
||||
error = zpl_get_link_common(dentry, inode, &link);
|
||||
if (error)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
return (*cookie = link);
|
||||
}
|
||||
#elif defined(HAVE_FOLLOW_LINK_COOKIE)
|
||||
const char *
|
||||
zpl_follow_link(struct dentry *dentry, void **cookie)
|
||||
{
|
||||
char *link = NULL;
|
||||
int error;
|
||||
|
||||
error = zpl_get_link_common(dentry, dentry->d_inode, &link);
|
||||
if (error)
|
||||
return (ERR_PTR(error));
|
||||
|
||||
return (*cookie = link);
|
||||
}
|
||||
#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA)
|
||||
static void *
|
||||
zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
char *link = NULL;
|
||||
int error;
|
||||
|
||||
error = zpl_get_link_common(dentry, dentry->d_inode, &link);
|
||||
if (error)
|
||||
nd_set_link(nd, ERR_PTR(error));
|
||||
else
|
||||
nd_set_link(nd, link);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
struct inode *ip = old_dentry->d_inode;
|
||||
int error;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
if (ip->i_nlink >= ZFS_LINK_MAX)
|
||||
return (-EMLINK);
|
||||
|
||||
crhold(cr);
|
||||
ip->i_ctime = current_time(ip);
|
||||
igrab(ip); /* Use ihold() if available */
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_link(dir, ip, dname(dentry), cr, 0);
|
||||
if (error) {
|
||||
iput(ip);
|
||||
goto out;
|
||||
}
|
||||
|
||||
d_instantiate(dentry, ip);
|
||||
out:
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INODE_TRUNCATE_RANGE
|
||||
static void
|
||||
zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
|
||||
{
|
||||
cred_t *cr = CRED();
|
||||
flock64_t bf;
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
ASSERT3S(start, <=, end);
|
||||
|
||||
/*
|
||||
* zfs_freesp() will interpret (len == 0) as meaning "truncate until
|
||||
* the end of the file". We don't want that.
|
||||
*/
|
||||
if (start == end)
|
||||
return;
|
||||
|
||||
crhold(cr);
|
||||
|
||||
bf.l_type = F_WRLCK;
|
||||
bf.l_whence = SEEK_SET;
|
||||
bf.l_start = start;
|
||||
bf.l_len = end - start;
|
||||
bf.l_pid = 0;
|
||||
cookie = spl_fstrans_mark();
|
||||
zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
|
||||
spl_fstrans_unmark(cookie);
|
||||
|
||||
crfree(cr);
|
||||
}
|
||||
#endif /* HAVE_INODE_TRUNCATE_RANGE */
|
||||
|
||||
#ifdef HAVE_INODE_FALLOCATE
|
||||
static long
|
||||
zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len)
|
||||
{
|
||||
return (zpl_fallocate_common(ip, mode, offset, len));
|
||||
}
|
||||
#endif /* HAVE_INODE_FALLOCATE */
|
||||
|
||||
static int
|
||||
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
|
||||
zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
unsigned int flags = (nd ? nd->flags : 0);
|
||||
#else
|
||||
zpl_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
|
||||
/* CSTYLED */
|
||||
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
|
||||
int error;
|
||||
|
||||
if (flags & LOOKUP_RCU)
|
||||
return (-ECHILD);
|
||||
|
||||
/*
|
||||
* Automounted snapshots rely on periodic dentry revalidation
|
||||
* to defer snapshots from being automatically unmounted.
|
||||
*/
|
||||
if (zfsvfs->z_issnap) {
|
||||
if (time_after(jiffies, zfsvfs->z_snap_defer_time +
|
||||
MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
|
||||
zfsvfs->z_snap_defer_time = jiffies;
|
||||
zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,
|
||||
dmu_objset_id(zfsvfs->z_os), zfs_expire_snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* After a rollback negative dentries created before the rollback
|
||||
* time must be invalidated. Otherwise they can obscure files which
|
||||
* are only present in the rolled back dataset.
|
||||
*/
|
||||
if (dentry->d_inode == NULL) {
|
||||
spin_lock(&dentry->d_lock);
|
||||
error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
if (error)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The dentry may reference a stale inode if a mounted file system
|
||||
* was rolled back to a point in time where the object didn't exist.
|
||||
*/
|
||||
if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
|
||||
return (0);
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
const struct inode_operations zpl_inode_operations = {
|
||||
.setattr = zpl_setattr,
|
||||
.getattr = zpl_getattr,
|
||||
#ifdef HAVE_GENERIC_SETXATTR
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = zpl_xattr_list,
|
||||
#ifdef HAVE_INODE_TRUNCATE_RANGE
|
||||
.truncate_range = zpl_truncate_range,
|
||||
#endif /* HAVE_INODE_TRUNCATE_RANGE */
|
||||
#ifdef HAVE_INODE_FALLOCATE
|
||||
.fallocate = zpl_fallocate,
|
||||
#endif /* HAVE_INODE_FALLOCATE */
|
||||
#if defined(CONFIG_FS_POSIX_ACL)
|
||||
#if defined(HAVE_SET_ACL)
|
||||
.set_acl = zpl_set_acl,
|
||||
#endif
|
||||
#if defined(HAVE_GET_ACL)
|
||||
.get_acl = zpl_get_acl,
|
||||
#elif defined(HAVE_CHECK_ACL)
|
||||
.check_acl = zpl_check_acl,
|
||||
#elif defined(HAVE_PERMISSION)
|
||||
.permission = zpl_permission,
|
||||
#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
|
||||
#endif /* CONFIG_FS_POSIX_ACL */
|
||||
};
|
||||
|
||||
const struct inode_operations zpl_dir_inode_operations = {
|
||||
.create = zpl_create,
|
||||
.lookup = zpl_lookup,
|
||||
.link = zpl_link,
|
||||
.unlink = zpl_unlink,
|
||||
.symlink = zpl_symlink,
|
||||
.mkdir = zpl_mkdir,
|
||||
.rmdir = zpl_rmdir,
|
||||
.mknod = zpl_mknod,
|
||||
#ifdef HAVE_RENAME_WANTS_FLAGS
|
||||
.rename = zpl_rename2,
|
||||
#else
|
||||
.rename = zpl_rename,
|
||||
#endif
|
||||
#ifdef HAVE_TMPFILE
|
||||
.tmpfile = zpl_tmpfile,
|
||||
#endif
|
||||
.setattr = zpl_setattr,
|
||||
.getattr = zpl_getattr,
|
||||
#ifdef HAVE_GENERIC_SETXATTR
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = zpl_xattr_list,
|
||||
#if defined(CONFIG_FS_POSIX_ACL)
|
||||
#if defined(HAVE_SET_ACL)
|
||||
.set_acl = zpl_set_acl,
|
||||
#endif
|
||||
#if defined(HAVE_GET_ACL)
|
||||
.get_acl = zpl_get_acl,
|
||||
#elif defined(HAVE_CHECK_ACL)
|
||||
.check_acl = zpl_check_acl,
|
||||
#elif defined(HAVE_PERMISSION)
|
||||
.permission = zpl_permission,
|
||||
#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
|
||||
#endif /* CONFIG_FS_POSIX_ACL */
|
||||
};
|
||||
|
||||
const struct inode_operations zpl_symlink_inode_operations = {
|
||||
#ifdef HAVE_GENERIC_READLINK
|
||||
.readlink = generic_readlink,
|
||||
#endif
|
||||
#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE)
|
||||
.get_link = zpl_get_link,
|
||||
#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA)
|
||||
.follow_link = zpl_follow_link,
|
||||
#endif
|
||||
#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA)
|
||||
.put_link = zpl_put_link,
|
||||
#endif
|
||||
.setattr = zpl_setattr,
|
||||
.getattr = zpl_getattr,
|
||||
#ifdef HAVE_GENERIC_SETXATTR
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = zpl_xattr_list,
|
||||
};
|
||||
|
||||
const struct inode_operations zpl_special_inode_operations = {
|
||||
.setattr = zpl_setattr,
|
||||
.getattr = zpl_getattr,
|
||||
#ifdef HAVE_GENERIC_SETXATTR
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = zpl_xattr_list,
|
||||
#if defined(CONFIG_FS_POSIX_ACL)
|
||||
#if defined(HAVE_SET_ACL)
|
||||
.set_acl = zpl_set_acl,
|
||||
#endif
|
||||
#if defined(HAVE_GET_ACL)
|
||||
.get_acl = zpl_get_acl,
|
||||
#elif defined(HAVE_CHECK_ACL)
|
||||
.check_acl = zpl_check_acl,
|
||||
#elif defined(HAVE_PERMISSION)
|
||||
.permission = zpl_permission,
|
||||
#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
|
||||
#endif /* CONFIG_FS_POSIX_ACL */
|
||||
};
|
||||
|
||||
dentry_operations_t zpl_dentry_operations = {
|
||||
.d_revalidate = zpl_revalidate,
|
||||
};
|
||||
@@ -0,0 +1,426 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2011, Lawrence Livermore National Security, LLC.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zpl.h>
|
||||
|
||||
|
||||
static struct inode *
|
||||
zpl_inode_alloc(struct super_block *sb)
|
||||
{
|
||||
struct inode *ip;
|
||||
|
||||
VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
|
||||
inode_set_iversion(ip, 1);
|
||||
|
||||
return (ip);
|
||||
}
|
||||
|
||||
static void
|
||||
zpl_inode_destroy(struct inode *ip)
|
||||
{
|
||||
ASSERT(atomic_read(&ip->i_count) == 0);
|
||||
zfs_inode_destroy(ip);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from __mark_inode_dirty() to reflect that something in the
|
||||
* inode has changed. We use it to ensure the znode system attributes
|
||||
* are always strictly update to date with respect to the inode.
|
||||
*/
|
||||
#ifdef HAVE_DIRTY_INODE_WITH_FLAGS
|
||||
static void
|
||||
zpl_dirty_inode(struct inode *ip, int flags)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
zfs_dirty_inode(ip, flags);
|
||||
spl_fstrans_unmark(cookie);
|
||||
}
|
||||
#else
|
||||
static void
|
||||
zpl_dirty_inode(struct inode *ip)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
zfs_dirty_inode(ip, 0);
|
||||
spl_fstrans_unmark(cookie);
|
||||
}
|
||||
#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
|
||||
|
||||
/*
|
||||
* When ->drop_inode() is called its return value indicates if the
|
||||
* inode should be evicted from the inode cache. If the inode is
|
||||
* unhashed and has no links the default policy is to evict it
|
||||
* immediately.
|
||||
*
|
||||
* Prior to 2.6.36 this eviction was accomplished by the vfs calling
|
||||
* ->delete_inode(). It was ->delete_inode()'s responsibility to
|
||||
* truncate the inode pages and call clear_inode(). The call to
|
||||
* clear_inode() synchronously invalidates all the buffers and
|
||||
* calls ->clear_inode(). It was ->clear_inode()'s responsibility
|
||||
* to cleanup and filesystem specific data before freeing the inode.
|
||||
*
|
||||
* This elaborate mechanism was replaced by ->evict_inode() which
|
||||
* does the job of both ->delete_inode() and ->clear_inode(). It
|
||||
* will be called exactly once, and when it returns the inode must
|
||||
* be in a state where it can simply be freed.i
|
||||
*
|
||||
* The ->evict_inode() callback must minimally truncate the inode pages,
|
||||
* and call clear_inode(). For 2.6.35 and later kernels this will
|
||||
* simply update the inode state, with the sync occurring before the
|
||||
* truncate in evict(). For earlier kernels clear_inode() maps to
|
||||
* end_writeback() which is responsible for completing all outstanding
|
||||
* write back. In either case, once this is done it is safe to cleanup
|
||||
* any remaining inode specific data via zfs_inactive().
|
||||
* remaining filesystem specific data.
|
||||
*/
|
||||
#ifdef HAVE_EVICT_INODE
|
||||
static void
|
||||
zpl_evict_inode(struct inode *ip)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
truncate_setsize(ip, 0);
|
||||
clear_inode(ip);
|
||||
zfs_inactive(ip);
|
||||
spl_fstrans_unmark(cookie);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void
|
||||
zpl_drop_inode(struct inode *ip)
|
||||
{
|
||||
generic_delete_inode(ip);
|
||||
}
|
||||
|
||||
static void
|
||||
zpl_clear_inode(struct inode *ip)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
zfs_inactive(ip);
|
||||
spl_fstrans_unmark(cookie);
|
||||
}
|
||||
|
||||
static void
|
||||
zpl_inode_delete(struct inode *ip)
|
||||
{
|
||||
truncate_setsize(ip, 0);
|
||||
clear_inode(ip);
|
||||
}
|
||||
#endif /* HAVE_EVICT_INODE */
|
||||
|
||||
static void
|
||||
zpl_put_super(struct super_block *sb)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
int error;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_umount(sb);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_sync_fs(struct super_block *sb, int wait)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
cred_t *cr = CRED();
|
||||
int error;
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_sync(sb, wait, cr);
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
|
||||
{
|
||||
fstrans_cookie_t cookie;
|
||||
int error;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_statvfs(dentry, statp);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
/*
|
||||
* If required by a 32-bit system call, dynamically scale the
|
||||
* block size up to 16MiB and decrease the block counts. This
|
||||
* allows for a maximum size of 64EiB to be reported. The file
|
||||
* counts must be artificially capped at 2^32-1.
|
||||
*/
|
||||
if (unlikely(zpl_is_32bit_api())) {
|
||||
while (statp->f_blocks > UINT32_MAX &&
|
||||
statp->f_bsize < SPA_MAXBLOCKSIZE) {
|
||||
statp->f_frsize <<= 1;
|
||||
statp->f_bsize <<= 1;
|
||||
|
||||
statp->f_blocks >>= 1;
|
||||
statp->f_bfree >>= 1;
|
||||
statp->f_bavail >>= 1;
|
||||
}
|
||||
|
||||
uint64_t usedobjs = statp->f_files - statp->f_ffree;
|
||||
statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
|
||||
statp->f_files = statp->f_ffree + usedobjs;
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_remount_fs(struct super_block *sb, int *flags, char *data)
|
||||
{
|
||||
zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
|
||||
fstrans_cookie_t cookie;
|
||||
int error;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_remount(sb, flags, &zm);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
__zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
|
||||
{
|
||||
seq_printf(seq, ",%s",
|
||||
zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
|
||||
|
||||
#ifdef CONFIG_FS_POSIX_ACL
|
||||
switch (zfsvfs->z_acl_type) {
|
||||
case ZFS_ACLTYPE_POSIXACL:
|
||||
seq_puts(seq, ",posixacl");
|
||||
break;
|
||||
default:
|
||||
seq_puts(seq, ",noacl");
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_FS_POSIX_ACL */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
|
||||
static int
|
||||
zpl_show_options(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
return (__zpl_show_options(seq, root->d_sb->s_fs_info));
|
||||
}
|
||||
#else
|
||||
static int
|
||||
zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
|
||||
{
|
||||
return (__zpl_show_options(seq, vfsp->mnt_sb->s_fs_info));
|
||||
}
|
||||
#endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
|
||||
|
||||
static int
|
||||
zpl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
{
|
||||
zfs_mnt_t *zm = (zfs_mnt_t *)data;
|
||||
fstrans_cookie_t cookie;
|
||||
int error;
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
error = -zfs_domount(sb, zm, silent);
|
||||
spl_fstrans_unmark(cookie);
|
||||
ASSERT3S(error, <=, 0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_test_super(struct super_block *s, void *data)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||
objset_t *os = data;
|
||||
|
||||
if (zfsvfs == NULL)
|
||||
return (0);
|
||||
|
||||
return (os == zfsvfs->z_os);
|
||||
}
|
||||
|
||||
static struct super_block *
|
||||
zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||
{
|
||||
struct super_block *s;
|
||||
objset_t *os;
|
||||
int err;
|
||||
|
||||
err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
|
||||
if (err)
|
||||
return (ERR_PTR(-err));
|
||||
|
||||
/*
|
||||
* The dsl pool lock must be released prior to calling sget().
|
||||
* It is possible sget() may block on the lock in grab_super()
|
||||
* while deactivate_super() holds that same lock and waits for
|
||||
* a txg sync. If the dsl_pool lock is held over sget()
|
||||
* this can prevent the pool sync and cause a deadlock.
|
||||
*/
|
||||
dsl_pool_rele(dmu_objset_pool(os), FTAG);
|
||||
s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os);
|
||||
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
|
||||
|
||||
if (IS_ERR(s))
|
||||
return (ERR_CAST(s));
|
||||
|
||||
if (s->s_root == NULL) {
|
||||
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
|
||||
if (err) {
|
||||
deactivate_locked_super(s);
|
||||
return (ERR_PTR(err));
|
||||
}
|
||||
s->s_flags |= SB_ACTIVE;
|
||||
} else if ((flags ^ s->s_flags) & SB_RDONLY) {
|
||||
deactivate_locked_super(s);
|
||||
return (ERR_PTR(-EBUSY));
|
||||
}
|
||||
|
||||
return (s);
|
||||
}
|
||||
|
||||
#ifdef HAVE_FST_MOUNT
|
||||
static struct dentry *
|
||||
zpl_mount(struct file_system_type *fs_type, int flags,
|
||||
const char *osname, void *data)
|
||||
{
|
||||
zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
|
||||
|
||||
struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
|
||||
if (IS_ERR(sb))
|
||||
return (ERR_CAST(sb));
|
||||
|
||||
return (dget(sb->s_root));
|
||||
}
|
||||
#else
|
||||
static int
|
||||
zpl_get_sb(struct file_system_type *fs_type, int flags,
|
||||
const char *osname, void *data, struct vfsmount *mnt)
|
||||
{
|
||||
zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
|
||||
|
||||
struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
|
||||
if (IS_ERR(sb))
|
||||
return (PTR_ERR(sb));
|
||||
|
||||
(void) simple_set_mnt(mnt, sb);
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* HAVE_FST_MOUNT */
|
||||
|
||||
static void
|
||||
zpl_kill_sb(struct super_block *sb)
|
||||
{
|
||||
zfs_preumount(sb);
|
||||
kill_anon_super(sb);
|
||||
|
||||
#ifdef HAVE_S_INSTANCES_LIST_HEAD
|
||||
sb->s_instances.next = &(zpl_fs_type.fs_supers);
|
||||
#endif /* HAVE_S_INSTANCES_LIST_HEAD */
|
||||
}
|
||||
|
||||
void
|
||||
zpl_prune_sb(int64_t nr_to_scan, void *arg)
|
||||
{
|
||||
struct super_block *sb = (struct super_block *)arg;
|
||||
int objects = 0;
|
||||
|
||||
(void) -zfs_prune(sb, nr_to_scan, &objects);
|
||||
}
|
||||
|
||||
#ifdef HAVE_NR_CACHED_OBJECTS
|
||||
static int
|
||||
zpl_nr_cached_objects(struct super_block *sb)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
#endif /* HAVE_NR_CACHED_OBJECTS */
|
||||
|
||||
#ifdef HAVE_FREE_CACHED_OBJECTS
|
||||
static void
|
||||
zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
|
||||
{
|
||||
/* noop */
|
||||
}
|
||||
#endif /* HAVE_FREE_CACHED_OBJECTS */
|
||||
|
||||
const struct super_operations zpl_super_operations = {
|
||||
.alloc_inode = zpl_inode_alloc,
|
||||
.destroy_inode = zpl_inode_destroy,
|
||||
.dirty_inode = zpl_dirty_inode,
|
||||
.write_inode = NULL,
|
||||
#ifdef HAVE_EVICT_INODE
|
||||
.evict_inode = zpl_evict_inode,
|
||||
#else
|
||||
.drop_inode = zpl_drop_inode,
|
||||
.clear_inode = zpl_clear_inode,
|
||||
.delete_inode = zpl_inode_delete,
|
||||
#endif /* HAVE_EVICT_INODE */
|
||||
.put_super = zpl_put_super,
|
||||
.sync_fs = zpl_sync_fs,
|
||||
.statfs = zpl_statfs,
|
||||
.remount_fs = zpl_remount_fs,
|
||||
.show_options = zpl_show_options,
|
||||
.show_stats = NULL,
|
||||
#ifdef HAVE_NR_CACHED_OBJECTS
|
||||
.nr_cached_objects = zpl_nr_cached_objects,
|
||||
#endif /* HAVE_NR_CACHED_OBJECTS */
|
||||
#ifdef HAVE_FREE_CACHED_OBJECTS
|
||||
.free_cached_objects = zpl_free_cached_objects,
|
||||
#endif /* HAVE_FREE_CACHED_OBJECTS */
|
||||
};
|
||||
|
||||
struct file_system_type zpl_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = ZFS_DRIVER,
|
||||
#ifdef HAVE_FST_MOUNT
|
||||
.mount = zpl_mount,
|
||||
#else
|
||||
.get_sb = zpl_get_sb,
|
||||
#endif /* HAVE_FST_MOUNT */
|
||||
.kill_sb = zpl_kill_sb,
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user