mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Add support for boot environment data to be stored in the label
Modern bootloaders leverage data stored in the root filesystem to enable some of their powerful features. GRUB specifically has a grubenv file which can store large amounts of configuration data that can be read and written at boot time and during normal operation. This allows sysadmins to configure useful features like automated failover after failed boot attempts. Unfortunately, due to the Copy-on-Write nature of ZFS, the standard behavior of these tools cannot handle writing to ZFS files safely at boot time. We need an alternative way to store data that allows the bootloader to make changes to the data. This work is very similar to work that was done on Illumos to enable similar functionality in the FreeBSD bootloader. This patch is different in that the data being stored is a raw grubenv file; this file can store arbitrary variables and values, and the scripting provided by grub is powerful enough that special structures are not required to implement advanced behavior. We repurpose the second padding area in each label to store the grubenv file, protected by an embedded checksum. We add two ioctls to get and set this data, and libzfs_core and libzfs functions to access them more easily. There are no direct command line interfaces to these functions; these will be added directly to the bootloader utilities. Reviewed-by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #10009
This commit is contained in:
+149
-7
@@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*/
|
||||
|
||||
@@ -957,7 +957,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
||||
nvlist_t *label;
|
||||
vdev_phys_t *vp;
|
||||
abd_t *vp_abd;
|
||||
abd_t *pad2;
|
||||
abd_t *bootenv;
|
||||
uberblock_t *ub;
|
||||
abd_t *ub_abd;
|
||||
zio_t *zio;
|
||||
@@ -1118,8 +1118,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
||||
ub->ub_txg = 0;
|
||||
|
||||
/* Initialize the 2nd padding area. */
|
||||
pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
|
||||
abd_zero(pad2, VDEV_PAD_SIZE);
|
||||
bootenv = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
|
||||
abd_zero(bootenv, VDEV_PAD_SIZE);
|
||||
|
||||
/*
|
||||
* Write everything in parallel.
|
||||
@@ -1138,8 +1138,8 @@ retry:
|
||||
* Zero out the 2nd padding area where it might have
|
||||
* left over data from previous filesystem format.
|
||||
*/
|
||||
vdev_label_write(zio, vd, l, pad2,
|
||||
offsetof(vdev_label_t, vl_pad2),
|
||||
vdev_label_write(zio, vd, l, bootenv,
|
||||
offsetof(vdev_label_t, vl_be),
|
||||
VDEV_PAD_SIZE, NULL, NULL, flags);
|
||||
|
||||
vdev_label_write(zio, vd, l, ub_abd,
|
||||
@@ -1155,7 +1155,7 @@ retry:
|
||||
}
|
||||
|
||||
nvlist_free(label);
|
||||
abd_free(pad2);
|
||||
abd_free(bootenv);
|
||||
abd_free(ub_abd);
|
||||
abd_free(vp_abd);
|
||||
|
||||
@@ -1178,6 +1178,148 @@ retry:
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Done callback for vdev_label_read_bootenv_impl. If this is the first
|
||||
* callback to finish, store our abd in the callback pointer. Otherwise, we
|
||||
* just free our abd and return.
|
||||
*/
|
||||
static void
|
||||
vdev_label_read_bootenv_done(zio_t *zio)
|
||||
{
|
||||
zio_t *rio = zio->io_private;
|
||||
abd_t **cbp = rio->io_private;
|
||||
|
||||
ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE);
|
||||
|
||||
if (zio->io_error == 0) {
|
||||
mutex_enter(&rio->io_lock);
|
||||
if (*cbp == NULL) {
|
||||
/* Will free this buffer in vdev_label_read_bootenv. */
|
||||
*cbp = zio->io_abd;
|
||||
} else {
|
||||
abd_free(zio->io_abd);
|
||||
}
|
||||
mutex_exit(&rio->io_lock);
|
||||
} else {
|
||||
abd_free(zio->io_abd);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags)
|
||||
{
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_label_read_bootenv_impl(zio, vd->vdev_child[c], flags);
|
||||
|
||||
/*
|
||||
* We just use the first label that has a correct checksum; the
|
||||
* bootloader should have rewritten them all to be the same on boot,
|
||||
* and any changes we made since boot have been the same across all
|
||||
* labels.
|
||||
*
|
||||
* While grub supports writing to all four labels, other bootloaders
|
||||
* don't, so we only use the first two labels to store boot
|
||||
* information.
|
||||
*/
|
||||
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
|
||||
for (int l = 0; l < VDEV_LABELS / 2; l++) {
|
||||
vdev_label_read(zio, vd, l,
|
||||
abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE),
|
||||
offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE,
|
||||
vdev_label_read_bootenv_done, zio, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *command)
|
||||
{
|
||||
spa_t *spa = rvd->vdev_spa;
|
||||
abd_t *abd = NULL;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
ASSERT(command);
|
||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
|
||||
|
||||
zio_t *zio = zio_root(spa, NULL, &abd, flags);
|
||||
vdev_label_read_bootenv_impl(zio, rvd, flags);
|
||||
int err = zio_wait(zio);
|
||||
|
||||
if (abd != NULL) {
|
||||
vdev_boot_envblock_t *vbe = abd_to_buf(abd);
|
||||
if (vbe->vbe_version != VB_RAW) {
|
||||
abd_free(abd);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
|
||||
fnvlist_add_string(command, "envmap", vbe->vbe_bootenv);
|
||||
/* abd was allocated in vdev_label_read_bootenv_impl() */
|
||||
abd_free(abd);
|
||||
/* If we managed to read any successfully, return success. */
|
||||
return (0);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_label_write_bootenv(vdev_t *vd, char *envmap)
|
||||
{
|
||||
zio_t *zio;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_boot_envblock_t *bootenv;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
|
||||
int error = ENXIO;
|
||||
|
||||
if (strlen(envmap) >= sizeof (bootenv->vbe_bootenv)) {
|
||||
return (SET_ERROR(E2BIG));
|
||||
}
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
int child_err = vdev_label_write_bootenv(vd->vdev_child[c],
|
||||
envmap);
|
||||
/*
|
||||
* As long as any of the disks managed to write all of their
|
||||
* labels successfully, return success.
|
||||
*/
|
||||
if (child_err == 0)
|
||||
error = child_err;
|
||||
}
|
||||
|
||||
if (!vd->vdev_ops->vdev_op_leaf || vdev_is_dead(vd) ||
|
||||
!vdev_writeable(vd)) {
|
||||
return (error);
|
||||
}
|
||||
ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE);
|
||||
abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
|
||||
abd_zero(abd, VDEV_PAD_SIZE);
|
||||
bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE);
|
||||
|
||||
char *buf = bootenv->vbe_bootenv;
|
||||
(void) strlcpy(buf, envmap, sizeof (bootenv->vbe_bootenv));
|
||||
bootenv->vbe_version = VB_RAW;
|
||||
abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE);
|
||||
|
||||
retry:
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
for (int l = 0; l < VDEV_LABELS / 2; l++) {
|
||||
vdev_label_write(zio, vd, l, abd,
|
||||
offsetof(vdev_label_t, vl_be),
|
||||
VDEV_PAD_SIZE, NULL, NULL, flags);
|
||||
}
|
||||
|
||||
error = zio_wait(zio);
|
||||
if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
abd_free(abd);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* uberblock load/sync
|
||||
|
||||
Reference in New Issue
Block a user