ZTS: Fix zpool_reopen_001_pos

Update the vdev_disk_open() retry logic to use a specified number
of milliseconds to be more robust.  Additionally, on failure log
both the time waited and requested timeout to the internal log.

The default maximum allowed open retry time has been increased
from 500ms to 1000ms.

Reviewed-by: Kjeld Schouten <kjeld@schouten-lebbing.nl>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #9680
Conflicts:
This commit is contained in:
Brian Behlendorf 2019-12-09 11:09:14 -08:00 committed by Tony Hutter
parent 85ff6a23f4
commit 2525b71c68

View File

@ -38,9 +38,21 @@
#include <linux/msdos_fs.h> #include <linux/msdos_fs.h>
#include <linux/vfs_compat.h> #include <linux/vfs_compat.h>
/*
* Unique identifier for the exclusive vdev holder.
*/
static void *zfs_vdev_holder = VDEV_HOLDER; static void *zfs_vdev_holder = VDEV_HOLDER;
/* size of the "reserved" partition, in blocks */ /*
* Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the
* device is missing. The missing path may be transient since the links
* can be briefly removed and recreated in response to udev events.
*/
static unsigned zfs_vdev_open_timeout_ms = 1000;
/*
* Size of the "reserved" partition, in blocks.
*/
#define EFI_MIN_RESV_SIZE (16 * 1024) #define EFI_MIN_RESV_SIZE (16 * 1024)
/* /*
@ -165,8 +177,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
{ {
struct block_device *bdev; struct block_device *bdev;
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
int count = 0, block_size; hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
int bdev_retry_count = 50;
vdev_disk_t *vd; vdev_disk_t *vd;
/* Must have a pathname and it must be absolute. */ /* Must have a pathname and it must be absolute. */
@ -181,7 +192,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
* partition force re-scanning the partition table while closed * partition force re-scanning the partition table while closed
* in order to get an accurate updated block device size. Then * in order to get an accurate updated block device size. Then
* since udev may need to recreate the device links increase the * since udev may need to recreate the device links increase the
* open retry count before reporting the device as unavailable. * open retry timeout before reporting the device as unavailable.
*/ */
vd = v->vdev_tsd; vd = v->vdev_tsd;
if (vd) { if (vd) {
@ -206,8 +217,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (!IS_ERR(bdev)) { if (!IS_ERR(bdev)) {
int error = vdev_bdev_reread_part(bdev); int error = vdev_bdev_reread_part(bdev);
vdev_bdev_close(bdev, mode); vdev_bdev_close(bdev, mode);
if (error == 0) if (error == 0) {
bdev_retry_count = 100; timeout = MSEC2NSEC(
zfs_vdev_open_timeout_ms * 2);
}
} }
} }
} else { } else {
@ -240,12 +253,12 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
* and it is reasonable to sleep and retry before giving up. In * and it is reasonable to sleep and retry before giving up. In
* practice delays have been observed to be on the order of 100ms. * practice delays have been observed to be on the order of 100ms.
*/ */
hrtime_t start = gethrtime();
bdev = ERR_PTR(-ENXIO); bdev = ERR_PTR(-ENXIO);
while (IS_ERR(bdev) && count < bdev_retry_count) { while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder); bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder);
if (unlikely(PTR_ERR(bdev) == -ENOENT)) { if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
schedule_timeout(MSEC_TO_TICK(10)); schedule_timeout(MSEC_TO_TICK(10));
count++;
} else if (IS_ERR(bdev)) { } else if (IS_ERR(bdev)) {
break; break;
} }
@ -253,7 +266,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (IS_ERR(bdev)) { if (IS_ERR(bdev)) {
int error = -PTR_ERR(bdev); int error = -PTR_ERR(bdev);
vdev_dbgmsg(v, "open error=%d count=%d", error, count); vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error,
(u_longlong_t)(gethrtime() - start),
(u_longlong_t)timeout);
vd->vd_bdev = NULL; vd->vd_bdev = NULL;
v->vdev_tsd = vd; v->vdev_tsd = vd;
rw_exit(&vd->vd_lock); rw_exit(&vd->vd_lock);
@ -267,7 +282,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
struct request_queue *q = bdev_get_queue(vd->vd_bdev); struct request_queue *q = bdev_get_queue(vd->vd_bdev);
/* Determine the physical block size */ /* Determine the physical block size */
block_size = vdev_bdev_block_size(vd->vd_bdev); int block_size = vdev_bdev_block_size(vd->vd_bdev);
/* Clear the nowritecache bit, causes vdev_reopen() to try again. */ /* Clear the nowritecache bit, causes vdev_reopen() to try again. */
v->vdev_nowritecache = B_FALSE; v->vdev_nowritecache = B_FALSE;