mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-31 21:39:36 +03:00
060f0226e6
When Multihost is enabled, and a pool is imported, uberblock writes include ub_mmp_delay to allow an importing node to calculate the duration of an activity test. This value, is not enough information. If zfs_multihost_fail_intervals > 0 on the node with the pool imported, the safe minimum duration of the activity test is well defined, but does not depend on ub_mmp_delay: zfs_multihost_fail_intervals * zfs_multihost_interval and if zfs_multihost_fail_intervals == 0 on that node, there is no such well defined safe duration, but the importing host cannot tell whether mmp_delay is high due to I/O delays, or due to a very large zfs_multihost_interval setting on the host which last imported the pool. As a result, it may use a far longer period for the activity test than is necessary. This patch renames ub_mmp_sequence to ub_mmp_config and uses it to record the zfs_multihost_interval and zfs_multihost_fail_intervals values, as well as the mmp sequence. This allows a shorter activity test duration to be calculated by the importing host in most situations. These values are also added to the multihost_history kstat records. It calculates the activity test duration differently depending on whether the new fields are present or not; for importing pools with only ub_mmp_delay, it uses (zfs_multihost_interval + ub_mmp_delay) * zfs_multihost_import_intervals Which results in an activity test duration less sensitive to the leaf count. In addition, it makes a few other improvements: * It updates the "sequence" part of ub_mmp_config when MMP writes in between syncs occur. This allows an importing host to detect MMP on the remote host sooner, when the pool is idle, as it is not limited to the granularity of ub_timestamp (1 second). * It issues writes immediately when zfs_multihost_interval is changed so remote hosts see the updated value as soon as possible. * It fixes a bug where setting zfs_multihost_fail_intervals = 1 results in immediate pool suspension. * Update tests to verify activity check duration is based on recorded tunable values, not tunable values on importing host. * Update tests to verify the expected number of uberblocks have valid MMP fields - fail_intervals, mmp_interval, mmp_seq (sequence number), that sequence number is incrementing, and that uberblock values match tunable settings. Reviewed-by: Andreas Dilger <andreas.dilger@whamcloud.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Olaf Faaland <faaland1@llnl.gov> Closes #7842
75 lines
2.2 KiB
C
75 lines
2.2 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
/*
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/uberblock_impl.h>
|
|
#include <sys/vdev_impl.h>
|
|
#include <sys/mmp.h>
|
|
|
|
int
|
|
uberblock_verify(uberblock_t *ub)
|
|
{
|
|
if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC))
|
|
byteswap_uint64_array(ub, sizeof (uberblock_t));
|
|
|
|
if (ub->ub_magic != UBERBLOCK_MAGIC)
|
|
return (SET_ERROR(EINVAL));
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Update the uberblock and return TRUE if anything changed in this
|
|
* transaction group.
|
|
*/
|
|
boolean_t
|
|
uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, uint64_t mmp_delay)
|
|
{
|
|
ASSERT(ub->ub_txg < txg);
|
|
|
|
/*
|
|
* We explicitly do not set ub_version here, so that older versions
|
|
* continue to be written with the previous uberblock version.
|
|
*/
|
|
ub->ub_magic = UBERBLOCK_MAGIC;
|
|
ub->ub_txg = txg;
|
|
ub->ub_guid_sum = rvd->vdev_guid_sum;
|
|
ub->ub_timestamp = gethrestime_sec();
|
|
ub->ub_software_version = SPA_VERSION;
|
|
ub->ub_mmp_magic = MMP_MAGIC;
|
|
if (spa_multihost(rvd->vdev_spa)) {
|
|
ub->ub_mmp_delay = mmp_delay;
|
|
ub->ub_mmp_config = MMP_SEQ_SET(0) |
|
|
MMP_INTERVAL_SET(zfs_multihost_interval) |
|
|
MMP_FAIL_INT_SET(zfs_multihost_fail_intervals);
|
|
} else {
|
|
ub->ub_mmp_delay = 0;
|
|
ub->ub_mmp_config = 0;
|
|
}
|
|
ub->ub_checkpoint_txg = 0;
|
|
|
|
return (ub->ub_rootbp.blk_birth == txg);
|
|
}
|