mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-15 04:30:33 +03:00
c3f2f1aa2d
Simplify vdev probes in the zio_vdev_io_done context to avoid holding the spa config lock for a long duration. Also allow zpool clear if no evidence of another host is using the pool. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Olaf Faaland <faaland1@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #15839
181 lines
6.3 KiB
C
181 lines
6.3 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2016, 2017 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#ifndef _SYS_UBERBLOCK_IMPL_H
|
|
#define _SYS_UBERBLOCK_IMPL_H
|
|
|
|
#include <sys/uberblock.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*
|
|
* The uberblock version is incremented whenever an incompatible on-disk
|
|
* format change is made to the SPA, DMU, or ZAP.
|
|
*
|
|
* Note: the first two fields should never be moved. When a storage pool
|
|
* is opened, the uberblock must be read off the disk before the version
|
|
* can be checked. If the ub_version field is moved, we may not detect
|
|
* version mismatch. If the ub_magic field is moved, applications that
|
|
* expect the magic number in the first word won't work.
|
|
*/
|
|
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
|
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
|
#define MMP_MAGIC 0xa11cea11 /* all-see-all */
|
|
|
|
#define MMP_INTERVAL_VALID_BIT 0x01
|
|
#define MMP_SEQ_VALID_BIT 0x02
|
|
#define MMP_FAIL_INT_VALID_BIT 0x04
|
|
|
|
#define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
|
|
(ubp)->ub_mmp_magic == MMP_MAGIC)
|
|
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
|
MMP_INTERVAL_VALID_BIT))
|
|
#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
|
MMP_SEQ_VALID_BIT))
|
|
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
|
MMP_FAIL_INT_VALID_BIT))
|
|
|
|
#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
|
|
>> 8)
|
|
#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
|
|
>> 32)
|
|
#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
|
|
>> 48)
|
|
|
|
#define MMP_INTERVAL_SET(write) \
|
|
(((uint64_t)(write & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
|
|
|
|
#define MMP_SEQ_SET(seq) \
|
|
(((uint64_t)(seq & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
|
|
|
|
#define MMP_FAIL_INT_SET(fail) \
|
|
(((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
|
|
|
|
/*
|
|
* RAIDZ expansion reflow information.
|
|
*
|
|
* 64 56 48 40 32 24 16 8 0
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
|
* |Scratch | Reflow |
|
|
* | State | Offset |
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
|
*/
|
|
typedef enum raidz_reflow_scratch_state {
|
|
RRSS_SCRATCH_NOT_IN_USE = 0,
|
|
RRSS_SCRATCH_VALID,
|
|
RRSS_SCRATCH_INVALID_SYNCED,
|
|
RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT,
|
|
RRSS_SCRATCH_INVALID_SYNCED_REFLOW
|
|
} raidz_reflow_scratch_state_t;
|
|
|
|
#define RRSS_GET_OFFSET(ub) \
|
|
BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0)
|
|
#define RRSS_SET_OFFSET(ub, x) \
|
|
BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x)
|
|
|
|
#define RRSS_GET_STATE(ub) \
|
|
BF64_GET((ub)->ub_raidz_reflow_info, 55, 9)
|
|
#define RRSS_SET_STATE(ub, x) \
|
|
BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x)
|
|
|
|
#define RAIDZ_REFLOW_SET(ub, state, offset) do { \
|
|
(ub)->ub_raidz_reflow_info = 0; \
|
|
RRSS_SET_OFFSET(ub, offset); \
|
|
RRSS_SET_STATE(ub, state); \
|
|
} while (0)
|
|
|
|
struct uberblock {
|
|
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
|
uint64_t ub_version; /* SPA_VERSION */
|
|
uint64_t ub_txg; /* txg of last sync */
|
|
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
|
uint64_t ub_timestamp; /* UTC time of last sync */
|
|
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
|
|
|
/* highest SPA_VERSION supported by software that wrote this txg */
|
|
uint64_t ub_software_version;
|
|
|
|
/* Maybe missing in uberblocks we read, but always written */
|
|
uint64_t ub_mmp_magic; /* MMP_MAGIC */
|
|
/*
|
|
* If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
|
|
* Otherwise, nanosec since last MMP write.
|
|
*/
|
|
uint64_t ub_mmp_delay;
|
|
|
|
/*
|
|
* The ub_mmp_config contains the multihost write interval, multihost
|
|
* fail intervals, sequence number for sub-second granularity, and
|
|
* valid bit mask. This layout is as follows:
|
|
*
|
|
* 64 56 48 40 32 24 16 8 0
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
|
* 0 | Fail Intervals| Seq | Write Interval (ms) | VALID |
|
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
|
*
|
|
* This allows a write_interval of (2^24/1000)s, over 4.5 hours
|
|
*
|
|
* VALID Bits:
|
|
* - 0x01 - Write Interval (ms)
|
|
* - 0x02 - Sequence number exists
|
|
* - 0x04 - Fail Intervals
|
|
* - 0xf8 - Reserved
|
|
*/
|
|
uint64_t ub_mmp_config;
|
|
|
|
/*
|
|
* ub_checkpoint_txg indicates two things about the current uberblock:
|
|
*
|
|
* 1] If it is not zero then this uberblock is a checkpoint. If it is
|
|
* zero, then this uberblock is not a checkpoint.
|
|
*
|
|
* 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is
|
|
* the ub_txg that the uberblock had at the time we moved it to
|
|
* the MOS config.
|
|
*
|
|
* The field is set when we checkpoint the uberblock and continues to
|
|
* hold that value even after we've rewound (unlike the ub_txg that
|
|
* is reset to a higher value).
|
|
*
|
|
* Besides checks used to determine whether we are reopening the
|
|
* pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
|
|
* the value of the field is used to determine which ZIL blocks have
|
|
* been allocated according to the ms_sm when we are rewinding to a
|
|
* checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then
|
|
* the ZIL block is not allocated [see uses of spa_min_claim_txg()].
|
|
*/
|
|
uint64_t ub_checkpoint_txg;
|
|
|
|
uint64_t ub_raidz_reflow_info;
|
|
};
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _SYS_UBERBLOCK_IMPL_H */
|