zpool: Add slot power control, print power status

Add `zpool` flags to control the slot power to drives.  This assumes
your SAS or NVMe enclosure supports slot power control via sysfs.

The new `--power` flag is added to `zpool offline|online|clear`:

    zpool offline --power <pool> <device>    Turn off device slot power
    zpool online --power <pool> <device>     Turn on device slot power
    zpool clear --power <pool> [device]      Turn on device slot power

If the ZPOOL_AUTO_POWER_ON_SLOT env var is set, then the '--power'
option is automatically implied for `zpool online` and `zpool clear`
and does not need to be passed.

zpool status also gets a --power option to print the slot power status.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mart Frauenlob <AllKind@fastest.cc>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15662
This commit is contained in:
Tony Hutter
2023-12-21 10:53:16 -08:00
committed by Brian Behlendorf
parent 6b33ae072c
commit 0bc841fb59
15 changed files with 794 additions and 48 deletions
+44 -5
View File
@@ -2808,6 +2808,9 @@ zpool_vdev_is_interior(const char *name)
return (B_FALSE);
}
/*
* Lookup the nvlist for a given vdev.
*/
nvlist_t *
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
boolean_t *l2cache, boolean_t *log)
@@ -2815,6 +2818,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
char *end;
nvlist_t *nvroot, *search, *ret;
uint64_t guid;
boolean_t __avail_spare, __l2cache, __log;
verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -2830,6 +2834,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
/*
* User can pass NULL for avail_spare, l2cache, and log, but
* we still need to provide variables to vdev_to_nvlist_iter(), so
* just point them to junk variables here.
*/
if (!avail_spare)
avail_spare = &__avail_spare;
if (!l2cache)
l2cache = &__l2cache;
if (!log)
log = &__log;
*avail_spare = B_FALSE;
*l2cache = B_FALSE;
if (log != NULL)
@@ -3233,21 +3249,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
}
/*
* Mark the given vdev degraded.
* Generic set vdev state function
*/
int
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
static int
zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux,
vdev_state_t state)
{
zfs_cmd_t zc = {"\0"};
char msg[1024];
libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
dgettext(TEXT_DOMAIN, "cannot set %s %llu"),
zpool_state_to_name(state, aux), (u_longlong_t)guid);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
zc.zc_cookie = VDEV_STATE_DEGRADED;
zc.zc_cookie = state;
zc.zc_obj = aux;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
@@ -3256,6 +3274,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
return (zpool_standard_error(hdl, errno, msg));
}
/*
* Mark the given vdev degraded.
*/
int
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED));
}
/*
* Mark the given vdev as in a removed state (as if the device does not exist).
*
* This is different than zpool_vdev_remove() which does a removal of a device
* from the pool (but the device does exist).
*/
int
zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED));
}
/*
* Returns TRUE if the given nvlist is a vdev that was originally swapped in as
* a hot spare.
+30 -10
View File
@@ -188,25 +188,17 @@ zpool_open_func(void *arg)
if (rn->rn_labelpaths) {
char *path = NULL;
char *devid = NULL;
char *env = NULL;
rdsk_node_t *slice;
avl_index_t where;
int timeout;
int error;
if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
return;
env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
timeout < 0) {
timeout = DISK_LABEL_WAIT;
}
/*
* Allow devlinks to stabilize so all paths are available.
*/
zpool_label_disk_wait(rn->rn_name, timeout);
zpool_disk_wait(rn->rn_name);
if (path != NULL) {
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
@@ -700,6 +692,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms)
#endif /* HAVE_LIBUDEV */
}
/*
* Simplified version of zpool_label_disk_wait() where we wait for a device
* to appear using the default timeouts.
*/
int
zpool_disk_wait(const char *path)
{
int timeout;
timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
DISK_LABEL_WAIT);
return (zpool_label_disk_wait(path, timeout));
}
/*
* Encode the persistent devices strings
* used for the vdev disk label
@@ -782,6 +788,10 @@ no_dev:
* in the nvlist * (if applicable). Like:
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
*
* If an old path was in the nvlist, and the rescan can not find a new path,
* then keep the old path, since the disk may have been removed.
*
* path: The vdev path (value from ZPOOL_CONFIG_PATH)
* key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
*/
void
@@ -789,6 +799,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
const char *key)
{
char *upath, *spath;
char *oldpath = NULL;
(void) nvlist_lookup_string(nv, key, &oldpath);
/* Add enclosure sysfs path (if disk is in an enclosure). */
upath = zfs_get_underlying_path(path);
@@ -797,7 +810,14 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
if (spath) {
(void) nvlist_add_string(nv, key, spath);
} else {
(void) nvlist_remove_all(nv, key);
/*
* We couldn't dynamically scan the disk's enclosure sysfs path.
* This could be because the disk went away. If there's an old
* enclosure sysfs path in the nvlist, then keep using it.
*/
if (!oldpath) {
(void) nvlist_remove_all(nv, key);
}
}
free(upath);
+98
View File
@@ -1836,6 +1836,104 @@ zpool_find_config(void *hdl, const char *target, nvlist_t **configp,
return (0);
}
/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */
static boolean_t
vdev_is_leaf(nvlist_t *nv)
{
uint_t children = 0;
nvlist_t **child;
(void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children);
return (children == 0);
}
/* Return if a vdev is a leaf vdev and a real device (disk or file) */
static boolean_t
vdev_is_real_leaf(nvlist_t *nv)
{
char *type = NULL;
if (!vdev_is_leaf(nv))
return (B_FALSE);
(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type);
if ((strcmp(type, VDEV_TYPE_DISK) == 0) ||
(strcmp(type, VDEV_TYPE_FILE) == 0)) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* This function is called by our FOR_EACH_VDEV() macros.
*
* state: State machine status (stored inside of a (nvlist_t *))
* nv: The current vdev nvlist_t we are iterating over.
* last_nv: The previous vdev nvlist_t we returned to the user in
* the last iteration of FOR_EACH_VDEV(). We use it
* to find the next vdev nvlist_t we should return.
* real_leaves_only: Only return leaf vdevs.
*
* Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if
* we're still searching for it.
*/
static int
__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv,
boolean_t real_leaves_only)
{
enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2};
/* The very first entry in the NV list is a special case */
if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) {
if (real_leaves_only && !vdev_is_real_leaf(nv))
return (0);
*((nvlist_t **)last_nv) = nv;
*((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
return (1);
}
/*
* We came across our last_nv, meaning the next one is the one we
* want
*/
if (nv == *((nvlist_t **)last_nv)) {
/* Next iteration of this function will return the nvlist_t */
*((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH;
return (0);
}
/*
* We marked NEXT_IS_MATCH on the previous iteration, so this is the one
* we want.
*/
if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) {
if (real_leaves_only && !vdev_is_real_leaf(nv))
return (0);
*((nvlist_t **)last_nv) = nv;
*((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
return (1);
}
return (0);
}
int
for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv)
{
return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE));
}
int
for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv,
void *last_nv)
{
return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE));
}
/*
* Internal function for iterating over the vdevs.
*
+31
View File
@@ -28,6 +28,7 @@
#include <string.h>
#include <sys/nvpair.h>
#include <sys/fs/zfs.h>
#include <math.h>
#include <libzutil.h>
@@ -143,3 +144,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
*leftover = bytes_read;
return (0);
}
/*
* Floating point sleep(). Allows you to pass in a floating point value for
* seconds.
*/
void
fsleep(float sec)
{
struct timespec req;
req.tv_sec = floor(sec);
req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
nanosleep(&req, NULL);
}
/*
* Get environment variable 'env' and return it as an integer.
* If 'env' is not set, then return 'default_val' instead.
*/
int
zpool_getenv_int(const char *env, int default_val)
{
char *str;
int val;
str = getenv(env);
if ((str == NULL) || sscanf(str, "%d", &val) != 1 ||
val < 0) {
val = default_val;
}
return (val);
}