mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
361a7e8211
As such, there are no specific synchronous semantics defined for
the xattrs. But for xattr=on, it does log to ZIL and zil_commit() is
done, if sync=always is set on dataset. This provides sync semantics
for xattr=on with sync=always set on dataset.
For the xattr=sa implementation, it doesn't log to ZIL, so, even with
sync=always, xattrs are not guaranteed to be synced before xattr call
returns to caller. So, xattr can be lost if system crash happens, before
txg carrying xattr transaction is synced.
This change adds xattr=sa logging to ZIL on xattr create/remove/update
and xattrs are synced to ZIL (zil_commit() done) for sync=always.
This makes xattr=sa behavior similar to xattr=on.
Implementation notes:
The actual logging is fairly straight-forward and does not warrant
additional explanation.
However, it has been 14 years since we last added new TX types
to the ZIL [1], hence this is the first time we do it after the
introduction of zpool features. Therefore, here is an overview of the
feature activation and deactivation workflow:
1. The feature must be enabled. Otherwise, we don't log the new
record type. This ensures compatibility with older software.
2. The feature is activated per-dataset, since the ZIL is per-dataset.
3. If the feature is enabled and dataset is not for zvol, any append to
the ZIL chain will activate the feature for the dataset. Likewise
for starting a new ZIL chain.
4. A dataset that doesn't have a ZIL chain has the feature deactivated.
We ensure (3) by activating on the first zil_commit() after the feature
was enabled. Since activating the features requires waiting for txg
sync, the first zil_commit() after enabling the feature will be slower
than usual. The downside is that this is really a conservative
approximation: even if we never append a 'TX_SETSAXATTR' to the ZIL
chain, we pay the penalty for feature activation. The upside is that the
user is in control of when we pay the penalty, i.e., upon enabling the
feature.
We ensure (4) by hooking into zil_sync(), where ZIL destroy actually
happens.
One more piece on feature activation, since it's spread across
multiple functions:
zil_commit()
zil_process_commit_list()
if lwb == NULL // first zil_commit since zil_open
zil_create()
if no log block pointer in ZIL header:
if feature enabled and not active:
// CASE 1
enable, COALESCE txg wait with dmu_tx that allocated the
log block
else // log block was allocated earlier than this zil_open
if feature enabled and not active:
// CASE 2
enable, EXPLICIT txg wait
else // already have an in-DRAM LWB
if feature enabled and not active:
// this happens when we enable the feature after zil_create
// CASE 3
enable, EXPLICIT txg wait
[1] da6c28aaf6
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Christian Schwarz <christian.schwarz@nutanix.com>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Jitendra Patidar <jitendra.patidar@nutanix.com>
Closes #8768
Closes #9078
153 lines
4.4 KiB
C
153 lines
4.4 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
|
* Use is subject to license terms.
|
|
*/
|
|
|
|
#ifndef _SYS_ZFS_SA_H
|
|
#define _SYS_ZFS_SA_H
|
|
|
|
#ifdef _KERNEL
|
|
#include <sys/types32.h>
|
|
#include <sys/list.h>
|
|
#include <sys/dmu.h>
|
|
#include <sys/zfs_acl.h>
|
|
#include <sys/zfs_znode.h>
|
|
#include <sys/sa.h>
|
|
#include <sys/zil.h>
|
|
|
|
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*
|
|
* This is the list of known attributes
|
|
* to the ZPL. The values of the actual
|
|
* attributes are not defined by the order
|
|
* the enums. It is controlled by the attribute
|
|
* registration mechanism. Two different file system
|
|
* could have different numeric values for the same
|
|
* attributes. this list is only used for dereferencing
|
|
* into the table that will hold the actual numeric value.
|
|
*/
|
|
typedef enum zpl_attr {
|
|
ZPL_ATIME,
|
|
ZPL_MTIME,
|
|
ZPL_CTIME,
|
|
ZPL_CRTIME,
|
|
ZPL_GEN,
|
|
ZPL_MODE,
|
|
ZPL_SIZE,
|
|
ZPL_PARENT,
|
|
ZPL_LINKS,
|
|
ZPL_XATTR,
|
|
ZPL_RDEV,
|
|
ZPL_FLAGS,
|
|
ZPL_UID,
|
|
ZPL_GID,
|
|
ZPL_PAD,
|
|
ZPL_ZNODE_ACL,
|
|
ZPL_DACL_COUNT,
|
|
ZPL_SYMLINK,
|
|
ZPL_SCANSTAMP,
|
|
ZPL_DACL_ACES,
|
|
ZPL_DXATTR,
|
|
ZPL_PROJID,
|
|
ZPL_END
|
|
} zpl_attr_t;
|
|
|
|
#define ZFS_OLD_ZNODE_PHYS_SIZE 0x108
|
|
#define ZFS_SA_BASE_ATTR_SIZE (ZFS_OLD_ZNODE_PHYS_SIZE - \
|
|
sizeof (zfs_acl_phys_t))
|
|
|
|
#define SA_MODE_OFFSET 0
|
|
#define SA_SIZE_OFFSET 8
|
|
#define SA_GEN_OFFSET 16
|
|
#define SA_UID_OFFSET 24
|
|
#define SA_GID_OFFSET 32
|
|
#define SA_PARENT_OFFSET 40
|
|
#define SA_FLAGS_OFFSET 48
|
|
#define SA_PROJID_OFFSET 128
|
|
|
|
extern const sa_attr_reg_t zfs_attr_table[ZPL_END + 1];
|
|
|
|
/*
|
|
* This is a deprecated data structure that only exists for
|
|
* dealing with file systems create prior to ZPL version 5.
|
|
*/
|
|
typedef struct znode_phys {
|
|
uint64_t zp_atime[2]; /* 0 - last file access time */
|
|
uint64_t zp_mtime[2]; /* 16 - last file modification time */
|
|
uint64_t zp_ctime[2]; /* 32 - last file change time */
|
|
uint64_t zp_crtime[2]; /* 48 - creation time */
|
|
uint64_t zp_gen; /* 64 - generation (txg of creation) */
|
|
uint64_t zp_mode; /* 72 - file mode bits */
|
|
uint64_t zp_size; /* 80 - size of file */
|
|
uint64_t zp_parent; /* 88 - directory parent (`..') */
|
|
uint64_t zp_links; /* 96 - number of links to file */
|
|
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
|
|
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
|
|
uint64_t zp_flags; /* 120 - persistent flags */
|
|
uint64_t zp_uid; /* 128 - file owner */
|
|
uint64_t zp_gid; /* 136 - owning group */
|
|
uint64_t zp_zap; /* 144 - extra attributes */
|
|
uint64_t zp_pad[3]; /* 152 - future */
|
|
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
|
|
/*
|
|
* Data may pad out any remaining bytes in the znode buffer, eg:
|
|
*
|
|
* |<---------------------- dnode_phys (512) ------------------------>|
|
|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
|
|
* |<---- znode (264) ---->|<---- data (56) ---->|
|
|
*
|
|
* At present, we use this space for the following:
|
|
* - symbolic links
|
|
* - 32-byte anti-virus scanstamp (regular files only)
|
|
*/
|
|
} znode_phys_t;
|
|
|
|
#ifdef _KERNEL
|
|
|
|
#define DXATTR_MAX_ENTRY_SIZE (32768)
|
|
#define DXATTR_MAX_SA_SIZE (SPA_OLD_MAXBLOCKSIZE >> 1)
|
|
|
|
int zfs_sa_readlink(struct znode *, zfs_uio_t *);
|
|
void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
|
|
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
|
|
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
|
|
int zfs_sa_get_xattr(struct znode *);
|
|
int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t);
|
|
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
|
|
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
|
|
void zfs_sa_init(void);
|
|
void zfs_sa_fini(void);
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _SYS_ZFS_SA_H */
|