mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-24 00:46:39 +03:00
zed: Fix config_sync autoexpand flood
Users were seeing floods of `config_sync` events when autoexpand was enabled. This happened because all "disk status change" udev events invoke the autoexpand codepath, which calls zpool_relabel_disk(), which in turn cause another "disk status change" event to happen, in a feedback loop. Note that "disk status change" happens every time a user calls close() on a block device. This commit breaks the feedback loop by only allowing an autoexpand to happen if the disk actually changed size. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes: #7132 Closes: #7366 Closes #13729
This commit is contained in:
parent
37f6845c6f
commit
e27e692bcc
@ -894,14 +894,90 @@ zfs_deliver_check(nvlist_t *nvl)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a path to a vdev, lookup the vdev's physical size from its
|
||||
* config nvlist.
|
||||
*
|
||||
* Returns the vdev's physical size in bytes on success, 0 on error.
|
||||
*/
|
||||
static uint64_t
|
||||
vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
|
||||
{
|
||||
nvlist_t *nvl = NULL;
|
||||
boolean_t avail_spare, l2cache, log;
|
||||
vdev_stat_t *vs = NULL;
|
||||
uint_t c;
|
||||
|
||||
nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
|
||||
if (!nvl)
|
||||
return (0);
|
||||
|
||||
verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
if (!vs) {
|
||||
zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
|
||||
vdev_path);
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (vs->vs_pspace);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a path to a vdev, lookup if the vdev is a "whole disk" in the
|
||||
* config nvlist. "whole disk" means that ZFS was passed a whole disk
|
||||
* at pool creation time, which it partitioned up and has full control over.
|
||||
* Thus a partition with wholedisk=1 set tells us that zfs created the
|
||||
* partition at creation time. A partition without whole disk set would have
|
||||
* been created by externally (like with fdisk) and passed to ZFS.
|
||||
*
|
||||
* Returns the whole disk value (either 0 or 1).
|
||||
*/
|
||||
static uint64_t
|
||||
vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
|
||||
{
|
||||
nvlist_t *nvl = NULL;
|
||||
boolean_t avail_spare, l2cache, log;
|
||||
uint64_t wholedisk;
|
||||
|
||||
nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
|
||||
if (!nvl)
|
||||
return (0);
|
||||
|
||||
verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
|
||||
&wholedisk) == 0);
|
||||
|
||||
return (wholedisk);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the device size grew more than 1% then return true.
|
||||
*/
|
||||
#define DEVICE_GREW(oldsize, newsize) \
|
||||
((newsize > oldsize) && \
|
||||
((newsize / (newsize - oldsize)) <= 100))
|
||||
|
||||
static int
|
||||
zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
char *devname = data;
|
||||
boolean_t avail_spare, l2cache;
|
||||
nvlist_t *udev_nvl = data;
|
||||
nvlist_t *tgt;
|
||||
int error;
|
||||
|
||||
char *tmp_devname, devname[MAXPATHLEN];
|
||||
uint64_t guid;
|
||||
|
||||
if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
|
||||
sprintf(devname, "%llu", (u_longlong_t)guid);
|
||||
} else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
|
||||
&tmp_devname) == 0) {
|
||||
strlcpy(devname, tmp_devname, MAXPATHLEN);
|
||||
zfs_append_partition(devname, MAXPATHLEN);
|
||||
} else {
|
||||
zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
|
||||
}
|
||||
|
||||
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
|
||||
devname, zpool_get_name(zhp));
|
||||
|
||||
@ -953,12 +1029,75 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
|
||||
vdev_state_t newstate;
|
||||
|
||||
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
|
||||
error = zpool_vdev_online(zhp, fullpath, 0,
|
||||
&newstate);
|
||||
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
|
||||
"setting device '%s' to ONLINE state "
|
||||
"in pool '%s': %d", fullpath,
|
||||
zpool_get_name(zhp), error);
|
||||
/*
|
||||
* If this disk size has not changed, then
|
||||
* there's no need to do an autoexpand. To
|
||||
* check we look at the disk's size in its
|
||||
* config, and compare it to the disk size
|
||||
* that udev is reporting.
|
||||
*/
|
||||
uint64_t udev_size = 0, conf_size = 0,
|
||||
wholedisk = 0, udev_parent_size = 0;
|
||||
|
||||
/*
|
||||
* Get the size of our disk that udev is
|
||||
* reporting.
|
||||
*/
|
||||
if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
|
||||
&udev_size) != 0) {
|
||||
udev_size = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the size of our disk's parent device
|
||||
* from udev (where sda1's parent is sda).
|
||||
*/
|
||||
if (nvlist_lookup_uint64(udev_nvl,
|
||||
DEV_PARENT_SIZE, &udev_parent_size) != 0) {
|
||||
udev_parent_size = 0;
|
||||
}
|
||||
|
||||
conf_size = vdev_size_from_config(zhp,
|
||||
fullpath);
|
||||
|
||||
wholedisk = vdev_whole_disk_from_config(zhp,
|
||||
fullpath);
|
||||
|
||||
/*
|
||||
* Only attempt an autoexpand if the vdev size
|
||||
* changed. There are two different cases
|
||||
* to consider.
|
||||
*
|
||||
* 1. wholedisk=1
|
||||
* If you do a 'zpool create' on a whole disk
|
||||
* (like /dev/sda), then zfs will create
|
||||
* partitions on the disk (like /dev/sda1). In
|
||||
* that case, wholedisk=1 will be set in the
|
||||
* partition's nvlist config. So zed will need
|
||||
* to see if your parent device (/dev/sda)
|
||||
* expanded in size, and if so, then attempt
|
||||
* the autoexpand.
|
||||
*
|
||||
* 2. wholedisk=0
|
||||
* If you do a 'zpool create' on an existing
|
||||
* partition, or a device that doesn't allow
|
||||
* partitions, then wholedisk=0, and you will
|
||||
* simply need to check if the device itself
|
||||
* expanded in size.
|
||||
*/
|
||||
if (DEVICE_GREW(conf_size, udev_size) ||
|
||||
(wholedisk && DEVICE_GREW(conf_size,
|
||||
udev_parent_size))) {
|
||||
error = zpool_vdev_online(zhp, fullpath,
|
||||
0, &newstate);
|
||||
|
||||
zed_log_msg(LOG_INFO,
|
||||
"%s: autoexpanding '%s' from %llu"
|
||||
" to %llu bytes in pool '%s': %d",
|
||||
__func__, fullpath, conf_size,
|
||||
MAX(udev_size, udev_parent_size),
|
||||
zpool_get_name(zhp), error);
|
||||
}
|
||||
}
|
||||
}
|
||||
zpool_close(zhp);
|
||||
@ -989,7 +1128,7 @@ zfs_deliver_dle(nvlist_t *nvl)
|
||||
zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
|
||||
}
|
||||
|
||||
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
|
||||
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
|
||||
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
|
||||
"found", name);
|
||||
return (1);
|
||||
|
@ -78,6 +78,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
||||
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
|
||||
if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
|
||||
if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
|
||||
if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
|
||||
@ -130,6 +132,20 @@ dev_event_nvlist(struct udev_device *dev)
|
||||
|
||||
numval *= strtoull(value, NULL, 10);
|
||||
(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
|
||||
|
||||
/*
|
||||
* If the device has a parent, then get the parent block
|
||||
* device's size as well. For example, /dev/sda1's parent
|
||||
* is /dev/sda.
|
||||
*/
|
||||
struct udev_device *parent_dev = udev_device_get_parent(dev);
|
||||
if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
|
||||
!= NULL) {
|
||||
uint64_t numval = DEV_BSIZE;
|
||||
|
||||
numval *= strtoull(value, NULL, 10);
|
||||
(void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -244,6 +244,9 @@ extern "C" {
|
||||
#define DEV_PATH "path"
|
||||
#define DEV_IS_PART "is_slice"
|
||||
#define DEV_SIZE "dev_size"
|
||||
|
||||
/* Size of the whole parent block device (if dev is a partition) */
|
||||
#define DEV_PARENT_SIZE "dev_parent_size"
|
||||
#endif /* __linux__ */
|
||||
|
||||
#define EV_V1 1
|
||||
|
Loading…
Reference in New Issue
Block a user