mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
zed: Fix config_sync autoexpand flood
Users were seeing floods of `config_sync` events when autoexpand was enabled. This happened because all "disk status change" udev events invoke the autoexpand codepath, which calls zpool_relabel_disk(), which in turn cause another "disk status change" event to happen, in a feedback loop. Note that "disk status change" happens every time a user calls close() on a block device. This commit breaks the feedback loop by only allowing an autoexpand to happen if the disk actually changed size. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes: #7132 Closes: #7366 Closes #13729
This commit is contained in:
parent
37f6845c6f
commit
e27e692bcc
@ -894,14 +894,90 @@ zfs_deliver_check(nvlist_t *nvl)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a path to a vdev, lookup the vdev's physical size from its
|
||||||
|
* config nvlist.
|
||||||
|
*
|
||||||
|
* Returns the vdev's physical size in bytes on success, 0 on error.
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
|
||||||
|
{
|
||||||
|
nvlist_t *nvl = NULL;
|
||||||
|
boolean_t avail_spare, l2cache, log;
|
||||||
|
vdev_stat_t *vs = NULL;
|
||||||
|
uint_t c;
|
||||||
|
|
||||||
|
nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
|
||||||
|
if (!nvl)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
|
||||||
|
(uint64_t **)&vs, &c) == 0);
|
||||||
|
if (!vs) {
|
||||||
|
zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
|
||||||
|
vdev_path);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (vs->vs_pspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a path to a vdev, lookup if the vdev is a "whole disk" in the
|
||||||
|
* config nvlist. "whole disk" means that ZFS was passed a whole disk
|
||||||
|
* at pool creation time, which it partitioned up and has full control over.
|
||||||
|
* Thus a partition with wholedisk=1 set tells us that zfs created the
|
||||||
|
* partition at creation time. A partition without whole disk set would have
|
||||||
|
* been created by externally (like with fdisk) and passed to ZFS.
|
||||||
|
*
|
||||||
|
* Returns the whole disk value (either 0 or 1).
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
|
||||||
|
{
|
||||||
|
nvlist_t *nvl = NULL;
|
||||||
|
boolean_t avail_spare, l2cache, log;
|
||||||
|
uint64_t wholedisk;
|
||||||
|
|
||||||
|
nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
|
||||||
|
if (!nvl)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
|
||||||
|
&wholedisk) == 0);
|
||||||
|
|
||||||
|
return (wholedisk);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the device size grew more than 1% then return true.
|
||||||
|
*/
|
||||||
|
#define DEVICE_GREW(oldsize, newsize) \
|
||||||
|
((newsize > oldsize) && \
|
||||||
|
((newsize / (newsize - oldsize)) <= 100))
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
|
zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
|
||||||
{
|
{
|
||||||
char *devname = data;
|
|
||||||
boolean_t avail_spare, l2cache;
|
boolean_t avail_spare, l2cache;
|
||||||
|
nvlist_t *udev_nvl = data;
|
||||||
nvlist_t *tgt;
|
nvlist_t *tgt;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
char *tmp_devname, devname[MAXPATHLEN];
|
||||||
|
uint64_t guid;
|
||||||
|
|
||||||
|
if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
|
||||||
|
sprintf(devname, "%llu", (u_longlong_t)guid);
|
||||||
|
} else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
|
||||||
|
&tmp_devname) == 0) {
|
||||||
|
strlcpy(devname, tmp_devname, MAXPATHLEN);
|
||||||
|
zfs_append_partition(devname, MAXPATHLEN);
|
||||||
|
} else {
|
||||||
|
zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
|
||||||
|
}
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
|
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
|
||||||
devname, zpool_get_name(zhp));
|
devname, zpool_get_name(zhp));
|
||||||
|
|
||||||
@ -953,12 +1029,75 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
|
|||||||
vdev_state_t newstate;
|
vdev_state_t newstate;
|
||||||
|
|
||||||
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
|
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
|
||||||
error = zpool_vdev_online(zhp, fullpath, 0,
|
/*
|
||||||
&newstate);
|
* If this disk size has not changed, then
|
||||||
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
|
* there's no need to do an autoexpand. To
|
||||||
"setting device '%s' to ONLINE state "
|
* check we look at the disk's size in its
|
||||||
"in pool '%s': %d", fullpath,
|
* config, and compare it to the disk size
|
||||||
zpool_get_name(zhp), error);
|
* that udev is reporting.
|
||||||
|
*/
|
||||||
|
uint64_t udev_size = 0, conf_size = 0,
|
||||||
|
wholedisk = 0, udev_parent_size = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the size of our disk that udev is
|
||||||
|
* reporting.
|
||||||
|
*/
|
||||||
|
if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
|
||||||
|
&udev_size) != 0) {
|
||||||
|
udev_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the size of our disk's parent device
|
||||||
|
* from udev (where sda1's parent is sda).
|
||||||
|
*/
|
||||||
|
if (nvlist_lookup_uint64(udev_nvl,
|
||||||
|
DEV_PARENT_SIZE, &udev_parent_size) != 0) {
|
||||||
|
udev_parent_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
conf_size = vdev_size_from_config(zhp,
|
||||||
|
fullpath);
|
||||||
|
|
||||||
|
wholedisk = vdev_whole_disk_from_config(zhp,
|
||||||
|
fullpath);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only attempt an autoexpand if the vdev size
|
||||||
|
* changed. There are two different cases
|
||||||
|
* to consider.
|
||||||
|
*
|
||||||
|
* 1. wholedisk=1
|
||||||
|
* If you do a 'zpool create' on a whole disk
|
||||||
|
* (like /dev/sda), then zfs will create
|
||||||
|
* partitions on the disk (like /dev/sda1). In
|
||||||
|
* that case, wholedisk=1 will be set in the
|
||||||
|
* partition's nvlist config. So zed will need
|
||||||
|
* to see if your parent device (/dev/sda)
|
||||||
|
* expanded in size, and if so, then attempt
|
||||||
|
* the autoexpand.
|
||||||
|
*
|
||||||
|
* 2. wholedisk=0
|
||||||
|
* If you do a 'zpool create' on an existing
|
||||||
|
* partition, or a device that doesn't allow
|
||||||
|
* partitions, then wholedisk=0, and you will
|
||||||
|
* simply need to check if the device itself
|
||||||
|
* expanded in size.
|
||||||
|
*/
|
||||||
|
if (DEVICE_GREW(conf_size, udev_size) ||
|
||||||
|
(wholedisk && DEVICE_GREW(conf_size,
|
||||||
|
udev_parent_size))) {
|
||||||
|
error = zpool_vdev_online(zhp, fullpath,
|
||||||
|
0, &newstate);
|
||||||
|
|
||||||
|
zed_log_msg(LOG_INFO,
|
||||||
|
"%s: autoexpanding '%s' from %llu"
|
||||||
|
" to %llu bytes in pool '%s': %d",
|
||||||
|
__func__, fullpath, conf_size,
|
||||||
|
MAX(udev_size, udev_parent_size),
|
||||||
|
zpool_get_name(zhp), error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zpool_close(zhp);
|
zpool_close(zhp);
|
||||||
@ -989,7 +1128,7 @@ zfs_deliver_dle(nvlist_t *nvl)
|
|||||||
zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
|
zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
|
if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
|
||||||
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
|
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
|
||||||
"found", name);
|
"found", name);
|
||||||
return (1);
|
return (1);
|
||||||
|
@ -78,6 +78,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
|
|||||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
||||||
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
||||||
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
|
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
|
||||||
|
if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
|
||||||
|
zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
|
||||||
if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
|
if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
|
||||||
zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
|
zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
|
||||||
if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
|
if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
|
||||||
@ -130,6 +132,20 @@ dev_event_nvlist(struct udev_device *dev)
|
|||||||
|
|
||||||
numval *= strtoull(value, NULL, 10);
|
numval *= strtoull(value, NULL, 10);
|
||||||
(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
|
(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the device has a parent, then get the parent block
|
||||||
|
* device's size as well. For example, /dev/sda1's parent
|
||||||
|
* is /dev/sda.
|
||||||
|
*/
|
||||||
|
struct udev_device *parent_dev = udev_device_get_parent(dev);
|
||||||
|
if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
|
||||||
|
!= NULL) {
|
||||||
|
uint64_t numval = DEV_BSIZE;
|
||||||
|
|
||||||
|
numval *= strtoull(value, NULL, 10);
|
||||||
|
(void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -244,6 +244,9 @@ extern "C" {
|
|||||||
#define DEV_PATH "path"
|
#define DEV_PATH "path"
|
||||||
#define DEV_IS_PART "is_slice"
|
#define DEV_IS_PART "is_slice"
|
||||||
#define DEV_SIZE "dev_size"
|
#define DEV_SIZE "dev_size"
|
||||||
|
|
||||||
|
/* Size of the whole parent block device (if dev is a partition) */
|
||||||
|
#define DEV_PARENT_SIZE "dev_parent_size"
|
||||||
#endif /* __linux__ */
|
#endif /* __linux__ */
|
||||||
|
|
||||||
#define EV_V1 1
|
#define EV_V1 1
|
||||||
|
Loading…
Reference in New Issue
Block a user