mirror_zfs/cmd/zpool/os/linux/zpool_vdev_os.c
grembo 65d9212aee
FreeBSD boot code reminder after zpool upgrade
There used to be a warning after upgrading a zpool in FreeBSD, so users
won't forget to update the boot loader that pool is booted from.

This change brings this warning back, but only if the bootfs property
is set on the pool, which should be sufficient for the vast majority of
FreeBSD installations. People running something custom are most likely
aware of what to do after an upgrade in their specific environment.

Functionality is implemented in an OS specific helper function.

Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Michael Gmelin <grembo@FreeBSD.org>
Signed-off-by: Michael Gmelin <grembo@FreeBSD.org>
Closes #12099
Closes #12104
2021-06-01 15:03:49 -06:00

413 lines
12 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
* Copyright (c) 2016, 2017 Intel Corporation.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
*/
/*
* Functions to convert between a list of vdevs and an nvlist representing the
* configuration. Each entry in the list can be one of:
*
* Device vdevs
* disk=(path=..., devid=...)
* file=(path=...)
*
* Group vdevs
* raidz[1|2]=(...)
* mirror=(...)
*
* Hot spares
*
* While the underlying implementation supports it, group vdevs cannot contain
* other group vdevs. All userland verification of devices is contained within
* this file. If successful, the nvlist returned can be passed directly to the
* kernel; we've done as much verification as possible in userland.
*
* Hot spares are a special case, and passed down as an array of disk vdevs, at
* the same level as the root of the vdev tree.
*
* The only function exported by this file is 'make_root_vdev'. The
* function performs several passes:
*
* 1. Construct the vdev specification. Performs syntax validation and
* makes sure each device is valid.
* 2. Check for devices in use. Using libblkid to make sure that no
* devices are also in use. Some can be overridden using the 'force'
* flag, others cannot.
* 3. Check for replication errors if the 'force' flag is not specified.
* validates that the replication level is consistent across the
* entire pool.
* 4. Call libzfs to label any whole disks with an EFI label.
*/
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libintl.h>
#include <libnvpair.h>
#include <libzutil.h>
#include <limits.h>
#include <sys/spa.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "zpool_util.h"
#include <sys/zfs_context.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
#include <sys/efi_partition.h>
#include <sys/stat.h>
#include <sys/vtoc.h>
#include <sys/mntent.h>
#include <uuid/uuid.h>
#include <blkid/blkid.h>
typedef struct vdev_disk_db_entry
{
char id[24];
int sector_size;
} vdev_disk_db_entry_t;
/*
* Database of block devices that lie about physical sector sizes. The
* identification string must be precisely 24 characters to avoid false
* negatives
*/
static vdev_disk_db_entry_t vdev_disk_database[] = {
{"ATA ADATA SSD S396 3", 8192},
{"ATA APPLE SSD SM128E", 8192},
{"ATA APPLE SSD SM256E", 8192},
{"ATA APPLE SSD SM512E", 8192},
{"ATA APPLE SSD SM768E", 8192},
{"ATA C400-MTFDDAC064M", 8192},
{"ATA C400-MTFDDAC128M", 8192},
{"ATA C400-MTFDDAC256M", 8192},
{"ATA C400-MTFDDAC512M", 8192},
{"ATA Corsair Force 3 ", 8192},
{"ATA Corsair Force GS", 8192},
{"ATA INTEL SSDSA2CT04", 8192},
{"ATA INTEL SSDSA2BZ10", 8192},
{"ATA INTEL SSDSA2BZ20", 8192},
{"ATA INTEL SSDSA2BZ30", 8192},
{"ATA INTEL SSDSA2CW04", 8192},
{"ATA INTEL SSDSA2CW08", 8192},
{"ATA INTEL SSDSA2CW12", 8192},
{"ATA INTEL SSDSA2CW16", 8192},
{"ATA INTEL SSDSA2CW30", 8192},
{"ATA INTEL SSDSA2CW60", 8192},
{"ATA INTEL SSDSC2CT06", 8192},
{"ATA INTEL SSDSC2CT12", 8192},
{"ATA INTEL SSDSC2CT18", 8192},
{"ATA INTEL SSDSC2CT24", 8192},
{"ATA INTEL SSDSC2CW06", 8192},
{"ATA INTEL SSDSC2CW12", 8192},
{"ATA INTEL SSDSC2CW18", 8192},
{"ATA INTEL SSDSC2CW24", 8192},
{"ATA INTEL SSDSC2CW48", 8192},
{"ATA KINGSTON SH100S3", 8192},
{"ATA KINGSTON SH103S3", 8192},
{"ATA M4-CT064M4SSD2 ", 8192},
{"ATA M4-CT128M4SSD2 ", 8192},
{"ATA M4-CT256M4SSD2 ", 8192},
{"ATA M4-CT512M4SSD2 ", 8192},
{"ATA OCZ-AGILITY2 ", 8192},
{"ATA OCZ-AGILITY3 ", 8192},
{"ATA OCZ-VERTEX2 3.5 ", 8192},
{"ATA OCZ-VERTEX3 ", 8192},
{"ATA OCZ-VERTEX3 LT ", 8192},
{"ATA OCZ-VERTEX3 MI ", 8192},
{"ATA OCZ-VERTEX4 ", 8192},
{"ATA SAMSUNG MZ7WD120", 8192},
{"ATA SAMSUNG MZ7WD240", 8192},
{"ATA SAMSUNG MZ7WD480", 8192},
{"ATA SAMSUNG MZ7WD960", 8192},
{"ATA SAMSUNG SSD 830 ", 8192},
{"ATA Samsung SSD 840 ", 8192},
{"ATA SanDisk SSD U100", 8192},
{"ATA TOSHIBA THNSNH06", 8192},
{"ATA TOSHIBA THNSNH12", 8192},
{"ATA TOSHIBA THNSNH25", 8192},
{"ATA TOSHIBA THNSNH51", 8192},
{"ATA APPLE SSD TS064C", 4096},
{"ATA APPLE SSD TS128C", 4096},
{"ATA APPLE SSD TS256C", 4096},
{"ATA APPLE SSD TS512C", 4096},
{"ATA INTEL SSDSA2M040", 4096},
{"ATA INTEL SSDSA2M080", 4096},
{"ATA INTEL SSDSA2M160", 4096},
{"ATA INTEL SSDSC2MH12", 4096},
{"ATA INTEL SSDSC2MH25", 4096},
{"ATA OCZ CORE_SSD ", 4096},
{"ATA OCZ-VERTEX ", 4096},
{"ATA SAMSUNG MCCOE32G", 4096},
{"ATA SAMSUNG MCCOE64G", 4096},
{"ATA SAMSUNG SSD PM80", 4096},
/* Flash drives optimized for 4KB IOs on larger pages */
{"ATA INTEL SSDSC2BA10", 4096},
{"ATA INTEL SSDSC2BA20", 4096},
{"ATA INTEL SSDSC2BA40", 4096},
{"ATA INTEL SSDSC2BA80", 4096},
{"ATA INTEL SSDSC2BB08", 4096},
{"ATA INTEL SSDSC2BB12", 4096},
{"ATA INTEL SSDSC2BB16", 4096},
{"ATA INTEL SSDSC2BB24", 4096},
{"ATA INTEL SSDSC2BB30", 4096},
{"ATA INTEL SSDSC2BB40", 4096},
{"ATA INTEL SSDSC2BB48", 4096},
{"ATA INTEL SSDSC2BB60", 4096},
{"ATA INTEL SSDSC2BB80", 4096},
{"ATA INTEL SSDSC2BW24", 4096},
{"ATA INTEL SSDSC2BW48", 4096},
{"ATA INTEL SSDSC2BP24", 4096},
{"ATA INTEL SSDSC2BP48", 4096},
{"NA SmrtStorSDLKAE9W", 4096},
{"NVMe Amazon EC2 NVMe ", 4096},
/* Imported from Open Solaris */
{"ATA MARVELL SD88SA02", 4096},
/* Advanced format Hard drives */
{"ATA Hitachi HDS5C303", 4096},
{"ATA SAMSUNG HD204UI ", 4096},
{"ATA ST2000DL004 HD20", 4096},
{"ATA WDC WD10EARS-00M", 4096},
{"ATA WDC WD10EARS-00S", 4096},
{"ATA WDC WD10EARS-00Z", 4096},
{"ATA WDC WD15EARS-00M", 4096},
{"ATA WDC WD15EARS-00S", 4096},
{"ATA WDC WD15EARS-00Z", 4096},
{"ATA WDC WD20EARS-00M", 4096},
{"ATA WDC WD20EARS-00S", 4096},
{"ATA WDC WD20EARS-00Z", 4096},
{"ATA WDC WD1600BEVT-0", 4096},
{"ATA WDC WD2500BEVT-0", 4096},
{"ATA WDC WD3200BEVT-0", 4096},
{"ATA WDC WD5000BEVT-0", 4096},
};
#define INQ_REPLY_LEN 96
#define INQ_CMD_LEN 6
static const int vdev_disk_database_size =
sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
boolean_t
check_sector_size_database(char *path, int *sector_size)
{
unsigned char inq_buff[INQ_REPLY_LEN];
unsigned char sense_buffer[32];
unsigned char inq_cmd_blk[INQ_CMD_LEN] =
{INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
sg_io_hdr_t io_hdr;
int error;
int fd;
int i;
/* Prepare INQUIRY command */
memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
io_hdr.interface_id = 'S';
io_hdr.cmd_len = sizeof (inq_cmd_blk);
io_hdr.mx_sb_len = sizeof (sense_buffer);
io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
io_hdr.dxfer_len = INQ_REPLY_LEN;
io_hdr.dxferp = inq_buff;
io_hdr.cmdp = inq_cmd_blk;
io_hdr.sbp = sense_buffer;
io_hdr.timeout = 10; /* 10 milliseconds is ample time */
if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
return (B_FALSE);
error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
(void) close(fd);
if (error < 0)
return (B_FALSE);
if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
return (B_FALSE);
for (i = 0; i < vdev_disk_database_size; i++) {
if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
continue;
*sector_size = vdev_disk_database[i].sector_size;
return (B_TRUE);
}
return (B_FALSE);
}
static int
check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
{
int err;
char *value;
/* No valid type detected device is safe to use */
value = blkid_get_tag_value(cache, "TYPE", path);
if (value == NULL)
return (0);
/*
* If libblkid detects a ZFS device, we check the device
* using check_file() to see if it's safe. The one safe
* case is a spare device shared between multiple pools.
*/
if (strcmp(value, "zfs_member") == 0) {
err = check_file(path, force, isspare);
} else {
if (force) {
err = 0;
} else {
err = -1;
vdev_error(gettext("%s contains a filesystem of "
"type '%s'\n"), path, value);
}
}
free(value);
return (err);
}
/*
* Validate that a disk including all partitions are safe to use.
*
* For EFI labeled disks this can done relatively easily with the libefi
* library. The partition numbers are extracted from the label and used
* to generate the expected /dev/ paths. Each partition can then be
* checked for conflicts.
*
* For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
* but due to the lack of a readily available libraries this scanning is
* not implemented. Instead only the device path as given is checked.
*/
static int
check_disk(const char *path, blkid_cache cache, int force,
boolean_t isspare, boolean_t iswholedisk)
{
struct dk_gpt *vtoc;
char slice_path[MAXPATHLEN];
int err = 0;
int fd, i;
int flags = O_RDONLY|O_DIRECT;
if (!iswholedisk)
return (check_slice(path, cache, force, isspare));
/* only spares can be shared, other devices require exclusive access */
if (!isspare)
flags |= O_EXCL;
if ((fd = open(path, flags)) < 0) {
char *value = blkid_get_tag_value(cache, "TYPE", path);
(void) fprintf(stderr, gettext("%s is in use and contains "
"a %s filesystem.\n"), path, value ? value : "unknown");
free(value);
return (-1);
}
/*
* Expected to fail for non-EFI labeled disks. Just check the device
* as given and do not attempt to detect and scan partitions.
*/
err = efi_alloc_and_read(fd, &vtoc);
if (err) {
(void) close(fd);
return (check_slice(path, cache, force, isspare));
}
/*
* The primary efi partition label is damaged however the secondary
* label at the end of the device is intact. Rather than use this
* label we should play it safe and treat this as a non efi device.
*/
if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
efi_free(vtoc);
(void) close(fd);
if (force) {
/* Partitions will now be created using the backup */
return (0);
} else {
vdev_error(gettext("%s contains a corrupt primary "
"EFI label.\n"), path);
return (-1);
}
}
for (i = 0; i < vtoc->efi_nparts; i++) {
if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
continue;
if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
(void) snprintf(slice_path, sizeof (slice_path),
"%s%s%d", path, "-part", i+1);
else
(void) snprintf(slice_path, sizeof (slice_path),
"%s%s%d", path, isdigit(path[strlen(path)-1]) ?
"p" : "", i+1);
err = check_slice(slice_path, cache, force, isspare);
if (err)
break;
}
efi_free(vtoc);
(void) close(fd);
return (err);
}
int
check_device(const char *path, boolean_t force,
boolean_t isspare, boolean_t iswholedisk)
{
blkid_cache cache;
int error;
error = blkid_get_cache(&cache, NULL);
if (error != 0) {
(void) fprintf(stderr, gettext("unable to access the blkid "
"cache.\n"));
return (-1);
}
error = check_disk(path, cache, force, isspare, iswholedisk);
blkid_put_cache(cache);
return (error);
}
void
after_zpool_upgrade(zpool_handle_t *zhp)
{
}