zed: mark disks as REMOVED when they are removed

ZED does not take any action for disk removal events if there is no
spare VDEV available. Added zpool_vdev_remove_wanted() in libzfs
and vdev_remove_wanted() in vdev.c to remove the VDEV through ZED
on removal event.  This means that if you are running zed and
remove a disk, it will be propertly marked as REMOVED.

Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
Ameer Hamza
2022-09-27 04:32:42 +05:00
committed by Brian Behlendorf
parent 5219a2691e
commit bd9a9a4e1a
24 changed files with 395 additions and 51 deletions
+33
View File
@@ -48,6 +48,7 @@
#include <linux/mod_compat.h>
#include <sys/cred.h>
#include <sys/vnode.h>
#include <sys/misc.h>
char spl_gitrev[64] = ZFS_META_GITREV;
@@ -540,6 +541,38 @@ ddi_copyin(const void *from, void *to, size_t len, int flags)
}
EXPORT_SYMBOL(ddi_copyin);
/*
* Post a uevent to userspace whenever a new vdev adds to the pool. It is
* necessary to sync blkid information with udev, which zed daemon uses
* during device hotplug to identify the vdev.
*/
void
spl_signal_kobj_evt(struct block_device *bdev)
{
#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
#ifdef HAVE_BDEV_KOBJ
struct kobject *disk_kobj = bdev_kobj(bdev);
#else
struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
#endif
if (disk_kobj) {
int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
if (ret) {
pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
" (%p): failed(ret:%d)\n", KOBJ_CHANGE,
kobject_name(disk_kobj), disk_kobj, ret);
}
}
#else
/*
* This is encountered if neither bdev_kobj() nor part_to_dev() is available
* in the kernel - likely due to an API change that needs to be chased down.
*/
#error "Unsupported kernel: unable to get struct kobj from bdev"
#endif
}
EXPORT_SYMBOL(spl_signal_kobj_evt);
int
ddi_copyout(const void *from, void *to, size_t len, int flags)
{
+22 -2
View File
@@ -179,6 +179,18 @@ vdev_disk_error(zio_t *zio)
zio->io_flags);
}
static void
vdev_disk_kobj_evt_post(vdev_t *v)
{
vdev_disk_t *vd = v->vdev_tsd;
if (vd && vd->vd_bdev) {
spl_signal_kobj_evt(vd->vd_bdev);
} else {
vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n",
v->vdev_path);
}
}
static int
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
uint64_t *logical_ashift, uint64_t *physical_ashift)
@@ -290,6 +302,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
zfs_vdev_holder);
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
/*
* There is no point of waiting since device is removed
* explicitly
*/
if (v->vdev_removed)
break;
schedule_timeout(MSEC_TO_TICK(10));
} else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) {
timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10);
@@ -899,7 +918,7 @@ vdev_disk_io_done(zio_t *zio)
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
if (zfs_check_media_change(vd->vd_bdev)) {
if (!zfs_check_disk_status(vd->vd_bdev)) {
invalidate_bdev(vd->vd_bdev);
v->vdev_remove_wanted = B_TRUE;
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
@@ -955,7 +974,8 @@ vdev_ops_t vdev_disk_ops = {
.vdev_op_nparity = NULL,
.vdev_op_ndisks = NULL,
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
.vdev_op_leaf = B_TRUE /* leaf vdev */
.vdev_op_leaf = B_TRUE, /* leaf vdev */
.vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post
};
/*
+14 -5
View File
@@ -909,7 +909,16 @@ spa_change_guid(spa_t *spa)
spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
if (error == 0) {
spa_write_cachefile(spa, B_FALSE, B_TRUE);
/*
* Clear the kobj flag from all the vdevs to allow
* vdev_cache_process_kobj_evt() to post events to all the
* vdevs since GUID is updated.
*/
vdev_clear_kobj_evt(spa->spa_root_vdev);
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
}
@@ -5192,7 +5201,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
*/
spa_unload(spa);
spa_deactivate(spa);
spa_write_cachefile(spa, B_TRUE, B_TRUE);
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
spa_remove(spa);
if (locked)
mutex_exit(&spa_namespace_lock);
@@ -6012,7 +6021,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_spawn_aux_threads(spa);
spa_write_cachefile(spa, B_FALSE, B_TRUE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
/*
* Don't count references from objsets that are already closed
@@ -6073,7 +6082,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
if (props != NULL)
spa_configfile_set(spa, props, B_FALSE);
spa_write_cachefile(spa, B_FALSE, B_TRUE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
zfs_dbgmsg("spa_import: verbatim import of %s", pool);
mutex_exit(&spa_namespace_lock);
@@ -6465,7 +6474,7 @@ export_spa:
if (new_state != POOL_STATE_UNINITIALIZED) {
if (!hardforce)
spa_write_cachefile(spa, B_TRUE, B_TRUE);
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
spa_remove(spa);
} else {
/*
+13 -1
View File
@@ -238,7 +238,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
* would be required.
*/
void
spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
boolean_t postblkidevent)
{
spa_config_dirent_t *dp, *tdp;
nvlist_t *nvl;
@@ -344,6 +345,16 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
if (postsysevent)
spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
/*
* Post udev event to sync blkid information if the pool is created
* or a new vdev is added to the pool.
*/
if ((target->spa_root_vdev) && postblkidevent) {
vdev_post_kobj_evt(target->spa_root_vdev);
for (int i = 0; i < target->spa_l2cache.sav_count; i++)
vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]);
}
}
/*
@@ -598,6 +609,7 @@ spa_config_update(spa_t *spa, int what)
*/
if (!spa->spa_is_root) {
spa_write_cachefile(spa, B_FALSE,
what != SPA_CONFIG_UPDATE_POOL,
what != SPA_CONFIG_UPDATE_POOL);
}
+2 -2
View File
@@ -1291,7 +1291,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
* If the config changed, update the config cache.
*/
if (config_changed)
spa_write_cachefile(spa, B_FALSE, B_TRUE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
}
/*
@@ -1386,7 +1386,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
*/
if (config_changed) {
mutex_enter(&spa_namespace_lock);
spa_write_cachefile(spa, B_FALSE, B_TRUE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
mutex_exit(&spa_namespace_lock);
}
+59
View File
@@ -1938,6 +1938,14 @@ vdev_open(vdev_t *vd)
error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
&logical_ashift, &physical_ashift);
/* Keep the device in removed state if unplugged */
if (error == ENOENT && vd->vdev_removed) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
VDEV_AUX_NONE);
return (error);
}
/*
* Physical volume size should never be larger than its max size, unless
* the disk has shrunk while we were reading it or the device is buggy
@@ -3156,6 +3164,34 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
mutex_exit(&vd->vdev_dtl_lock);
}
/*
* Iterate over all the vdevs except spare, and post kobj events
*/
void
vdev_post_kobj_evt(vdev_t *vd)
{
if (vd->vdev_ops->vdev_op_kobj_evt_post &&
vd->vdev_kobj_flag == B_FALSE) {
vd->vdev_kobj_flag = B_TRUE;
vd->vdev_ops->vdev_op_kobj_evt_post(vd);
}
for (int c = 0; c < vd->vdev_children; c++)
vdev_post_kobj_evt(vd->vdev_child[c]);
}
/*
* Iterate over all the vdevs except spare, and clear kobj events
*/
void
vdev_clear_kobj_evt(vdev_t *vd)
{
vd->vdev_kobj_flag = B_FALSE;
for (int c = 0; c < vd->vdev_children; c++)
vdev_clear_kobj_evt(vd->vdev_child[c]);
}
int
vdev_dtl_load(vdev_t *vd)
{
@@ -3936,6 +3972,29 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
return (spa_vdev_state_exit(spa, vd, 0));
}
int
vdev_remove_wanted(spa_t *spa, uint64_t guid)
{
vdev_t *vd;
spa_vdev_state_enter(spa, SCL_NONE);
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
/*
* If the vdev is already removed, then don't do anything.
*/
if (vd->vdev_removed)
return (spa_vdev_state_exit(spa, NULL, 0));
vd->vdev_remove_wanted = B_TRUE;
spa_async_request(spa, SPA_ASYNC_REMOVE);
return (spa_vdev_state_exit(spa, vd, 0));
}
/*
* Online the given vdev.
*
+5 -1
View File
@@ -1921,6 +1921,10 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
break;
case VDEV_STATE_REMOVED:
error = vdev_remove_wanted(spa, zc->zc_guid);
break;
default:
error = SET_ERROR(EINVAL);
}
@@ -2928,7 +2932,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
mutex_enter(&spa_namespace_lock);
if ((spa = spa_lookup(zc->zc_name)) != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_write_cachefile(spa, B_FALSE, B_TRUE);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
}
mutex_exit(&spa_namespace_lock);
if (spa != NULL) {
+1 -1
View File
@@ -3928,7 +3928,7 @@ zio_vdev_io_done(zio_t *zio)
ops->vdev_op_io_done(zio);
if (unexpected_error)
if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
VERIFY(vdev_probe(vd, zio) == NULL);
return (zio);