mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-03-11 04:46:18 +03:00
As part of SPA_LOAD_IMPORT add an additional activity check to
detect simultaneous imports from different hosts. This check is
only required when the timing is such that there's no activity
for the the read-only tryimport check to detect. This extra
safety chceck operates as follows:
1. Repeats the following MMP check 10 times:
a. Write out an MMP uberblock with the best txg and a random
sequence id to all primary pool vdevs.
b. Verify a minimum number of good writes such that even if
the pool appears degraded on the remote host it will see
at least one of the updated MMP uberblocks.
c. Wait for the MMP interval this leaves a window for other
racing hosts to make similar modifications which can be
detected.
d. Call vdev_uberblock_load() to determine the best uberblock
to use, this should be the MMP uberblock just written.
e. Verify the txg and random sequeunce number match the MMP
uberblock written in 1a.
2. Restore the original MMP uberblocks. This allows the check
to be performed again if the pool fails to import for an
unrelated reason.
This change also includes some refactoring and minor improvements.
- Never try loading earlier txgs during import when the import
fails with EREMOTEIO or EINTER. These errors don't indicate
the txg is damaged but instead that its either in use on a
remote host or the import was interactively cancelled. No
rewind is also performed for EBADD which can result from a
stale trusted config when doing a verbatim import.
- Refactor the code for consistent logging of the multihost
activity check using spa_load_note() and console messages
indicating when the activity check was trigger and the result.
- Added MMP_*_MASK and MMP_SEQ_CLEAR() macros to allow easier
modification of the sequence number in an uberblock.
- Added ZFS_LOAD_INFO_DEBUG environment variable which can be
set to log to dump to stdout the spa_load_info nvlist returned
during import. This is used by the updated mmp test cases
to determine if an activity check was run and its result.
- Standardize the mmp messages similarly to make it easier to
find all the relevent mmp lines in the debug log.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Reviewed-by: Akash B <akash-b@hpe.com>
82 lines
2.7 KiB
C
82 lines
2.7 KiB
C
// SPDX-License-Identifier: CDDL-1.0
|
|
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* This file and its contents are supplied under the terms of the
|
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
|
* You may only use this file in accordance with the terms of version
|
|
* 1.0 of the CDDL.
|
|
*
|
|
* A full copy of the text of the CDDL should have accompanied this
|
|
* source. A copy of the CDDL is also available via the Internet at
|
|
* http://www.illumos.org/license/CDDL.
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (C) 2017 by Lawrence Livermore National Security, LLC.
|
|
*/
|
|
|
|
#ifndef _SYS_MMP_H
|
|
#define _SYS_MMP_H
|
|
|
|
#include <sys/spa.h>
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/uberblock_impl.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define MMP_MIN_INTERVAL 100 /* ms */
|
|
#define MMP_DEFAULT_INTERVAL 1000 /* ms */
|
|
#define MMP_DEFAULT_IMPORT_INTERVALS 20
|
|
#define MMP_DEFAULT_FAIL_INTERVALS 10
|
|
#define MMP_MIN_FAIL_INTERVALS 2 /* min if != 0 */
|
|
#define MMP_IMPORT_VERIFY_ITERS 10
|
|
#define MMP_IMPORT_SAFETY_FACTOR 200 /* pct */
|
|
#define MMP_INTERVAL_OK(interval) MAX(interval, MMP_MIN_INTERVAL)
|
|
#define MMP_FAIL_INTVS_OK(fails) (fails == 0 ? 0 : MAX(fails, \
|
|
MMP_MIN_FAIL_INTERVALS))
|
|
|
|
typedef struct mmp_thread {
|
|
kmutex_t mmp_thread_lock; /* protect thread mgmt fields */
|
|
kcondvar_t mmp_thread_cv;
|
|
kthread_t *mmp_thread;
|
|
uint8_t mmp_thread_exiting;
|
|
kmutex_t mmp_io_lock; /* protect below */
|
|
hrtime_t mmp_last_write; /* last successful MMP write */
|
|
uint64_t mmp_delay; /* decaying avg ns between MMP writes */
|
|
uberblock_t mmp_ub; /* last ub written by sync */
|
|
zio_t *mmp_zio_root; /* root of mmp write zios */
|
|
uint64_t mmp_kstat_id; /* unique id for next MMP write kstat */
|
|
int mmp_skip_error; /* reason for last skipped write */
|
|
vdev_t *mmp_last_leaf; /* last mmp write sent here */
|
|
uint64_t mmp_leaf_last_gen; /* last mmp write sent here */
|
|
uint32_t mmp_seq; /* intra-second update counter */
|
|
uint64_t mmp_tryimport_ns; /* tryimport activity check time */
|
|
uint64_t mmp_import_ns; /* import activity check time */
|
|
uint64_t mmp_claim_ns; /* claim activity check time */
|
|
} mmp_thread_t;
|
|
|
|
|
|
extern void mmp_init(struct spa *spa);
|
|
extern void mmp_fini(struct spa *spa);
|
|
extern void mmp_thread_start(struct spa *spa);
|
|
extern void mmp_thread_stop(struct spa *spa);
|
|
extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
|
|
extern void mmp_signal_all_threads(void);
|
|
extern int mmp_claim_uberblock(spa_t *spa, vdev_t *vd, uberblock_t *ub);
|
|
|
|
/* Global tuning */
|
|
extern int param_set_multihost_interval(ZFS_MODULE_PARAM_ARGS);
|
|
extern uint64_t zfs_multihost_interval;
|
|
extern uint_t zfs_multihost_fail_intervals;
|
|
extern uint_t zfs_multihost_import_intervals;
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _SYS_MMP_H */
|