mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
mmp: claim sequence id before final import
As part of SPA_LOAD_IMPORT add an additional activity check to
detect simultaneous imports from different hosts. This check is
only required when the timing is such that there's no activity
for the the read-only tryimport check to detect. This extra
safety chceck operates as follows:
1. Repeats the following MMP check 10 times:
a. Write out an MMP uberblock with the best txg and a random
sequence id to all primary pool vdevs.
b. Verify a minimum number of good writes such that even if
the pool appears degraded on the remote host it will see
at least one of the updated MMP uberblocks.
c. Wait for the MMP interval this leaves a window for other
racing hosts to make similar modifications which can be
detected.
d. Call vdev_uberblock_load() to determine the best uberblock
to use, this should be the MMP uberblock just written.
e. Verify the txg and random sequeunce number match the MMP
uberblock written in 1a.
2. Restore the original MMP uberblocks. This allows the check
to be performed again if the pool fails to import for an
unrelated reason.
This change also includes some refactoring and minor improvements.
- Never try loading earlier txgs during import when the import
fails with EREMOTEIO or EINTER. These errors don't indicate
the txg is damaged but instead that its either in use on a
remote host or the import was interactively cancelled. No
rewind is also performed for EBADD which can result from a
stale trusted config when doing a verbatim import.
- Refactor the code for consistent logging of the multihost
activity check using spa_load_note() and console messages
indicating when the activity check was trigger and the result.
- Added MMP_*_MASK and MMP_SEQ_CLEAR() macros to allow easier
modification of the sequence number in an uberblock.
- Added ZFS_LOAD_INFO_DEBUG environment variable which can be
set to log to dump to stdout the spa_load_info nvlist returned
during import. This is used by the updated mmp test cases
to determine if an activity check was run and its result.
- Standardize the mmp messages similarly to make it easier to
find all the relevent mmp lines in the debug log.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Reviewed-by: Akash B <akash-b@hpe.com>
This commit is contained in:
committed by
Tony Hutter
parent
328a823848
commit
a65bb7c518
@@ -863,6 +863,10 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_MMP_SEQ "mmp_seq" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MMP_RESULT "mmp_result" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MMP_TRYIMPORT_NS "mmp_tryimport_ns" /* not stored */
|
||||
#define ZPOOL_CONFIG_MMP_IMPORT_NS "mmp_import_ns" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MMP_CLAIM_NS "mmp_claim_ns" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */
|
||||
#define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats"
|
||||
|
||||
@@ -33,6 +33,7 @@ extern "C" {
|
||||
#define MMP_DEFAULT_IMPORT_INTERVALS 20
|
||||
#define MMP_DEFAULT_FAIL_INTERVALS 10
|
||||
#define MMP_MIN_FAIL_INTERVALS 2 /* min if != 0 */
|
||||
#define MMP_IMPORT_VERIFY_ITERS 10
|
||||
#define MMP_IMPORT_SAFETY_FACTOR 200 /* pct */
|
||||
#define MMP_INTERVAL_OK(interval) MAX(interval, MMP_MIN_INTERVAL)
|
||||
#define MMP_FAIL_INTVS_OK(fails) (fails == 0 ? 0 : MAX(fails, \
|
||||
@@ -53,6 +54,9 @@ typedef struct mmp_thread {
|
||||
vdev_t *mmp_last_leaf; /* last mmp write sent here */
|
||||
uint64_t mmp_leaf_last_gen; /* last mmp write sent here */
|
||||
uint32_t mmp_seq; /* intra-second update counter */
|
||||
uint64_t mmp_tryimport_ns; /* tryimport activity check time */
|
||||
uint64_t mmp_import_ns; /* import activity check time */
|
||||
uint64_t mmp_claim_ns; /* claim activity check time */
|
||||
} mmp_thread_t;
|
||||
|
||||
|
||||
@@ -62,6 +66,7 @@ extern void mmp_thread_start(struct spa *spa);
|
||||
extern void mmp_thread_stop(struct spa *spa);
|
||||
extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
|
||||
extern void mmp_signal_all_threads(void);
|
||||
extern int mmp_claim_uberblock(spa_t *spa, vdev_t *vd, uberblock_t *ub);
|
||||
|
||||
/* Global tuning */
|
||||
extern int param_set_multihost_interval(ZFS_MODULE_PARAM_ARGS);
|
||||
|
||||
@@ -304,6 +304,7 @@ struct spa {
|
||||
void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS];
|
||||
uberblock_t spa_ubsync; /* last synced uberblock */
|
||||
uberblock_t spa_uberblock; /* current uberblock */
|
||||
boolean_t spa_activity_check; /* activity check required */
|
||||
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
|
||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||
uint64_t spa_scrub_inflight; /* in-flight scrub bytes */
|
||||
|
||||
@@ -51,6 +51,12 @@ extern "C" {
|
||||
#define MMP_SEQ_VALID_BIT 0x02
|
||||
#define MMP_FAIL_INT_VALID_BIT 0x04
|
||||
|
||||
#define MMP_INTERVAL_MASK 0x00000000FFFFFF00
|
||||
#define MMP_SEQ_MASK 0x0000FFFF00000000
|
||||
#define MMP_FAIL_INT_MASK 0xFFFF000000000000
|
||||
|
||||
#define MMP_SEQ_MAX UINT16_MAX
|
||||
|
||||
#define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
|
||||
(ubp)->ub_mmp_magic == MMP_MAGIC)
|
||||
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
||||
@@ -60,21 +66,25 @@ extern "C" {
|
||||
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
|
||||
MMP_FAIL_INT_VALID_BIT))
|
||||
|
||||
#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
|
||||
#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & MMP_INTERVAL_MASK) \
|
||||
>> 8)
|
||||
#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
|
||||
#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & MMP_SEQ_MASK) \
|
||||
>> 32)
|
||||
#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
|
||||
#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & MMP_FAIL_INT_MASK) \
|
||||
>> 48)
|
||||
|
||||
#define MMP_INTERVAL_SET(write) \
|
||||
(((uint64_t)(write & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
|
||||
(((uint64_t)((write) & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
|
||||
|
||||
#define MMP_SEQ_SET(seq) \
|
||||
(((uint64_t)(seq & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
|
||||
(((uint64_t)((seq) & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
|
||||
|
||||
#define MMP_FAIL_INT_SET(fail) \
|
||||
(((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
|
||||
(((uint64_t)((fail) & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
|
||||
|
||||
|
||||
#define MMP_SEQ_CLEAR(ubp) \
|
||||
((ubp)->ub_mmp_config &= ~(MMP_SEQ_MASK | MMP_SEQ_VALID_BIT))
|
||||
|
||||
/*
|
||||
* RAIDZ expansion reflow information.
|
||||
|
||||
@@ -212,6 +212,8 @@ extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
|
||||
extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
|
||||
extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
|
||||
extern int vdev_uberblock_sync_list(vdev_t **, int, struct uberblock *, int);
|
||||
extern int vdev_uberblock_compare(const struct uberblock *,
|
||||
const struct uberblock *);
|
||||
extern int vdev_check_boot_reserve(spa_t *, vdev_t *);
|
||||
|
||||
typedef enum {
|
||||
|
||||
Reference in New Issue
Block a user