diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index ed79ede7c..661f0f1b7 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -38,9 +38,21 @@ #include #include +/* + * Unique identifier for the exclusive vdev holder. + */ static void *zfs_vdev_holder = VDEV_HOLDER; -/* size of the "reserved" partition, in blocks */ +/* + * Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the + * device is missing. The missing path may be transient since the links + * can be briefly removed and recreated in response to udev events. + */ +static unsigned zfs_vdev_open_timeout_ms = 1000; + +/* + * Size of the "reserved" partition, in blocks. + */ #define EFI_MIN_RESV_SIZE (16 * 1024) /* @@ -165,8 +177,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, { struct block_device *bdev; fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); - int count = 0, block_size; - int bdev_retry_count = 50; + hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms); vdev_disk_t *vd; /* Must have a pathname and it must be absolute. */ @@ -181,7 +192,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * partition force re-scanning the partition table while closed * in order to get an accurate updated block device size. Then * since udev may need to recreate the device links increase the - * open retry count before reporting the device as unavailable. + * open retry timeout before reporting the device as unavailable. */ vd = v->vdev_tsd; if (vd) { @@ -206,8 +217,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (!IS_ERR(bdev)) { int error = vdev_bdev_reread_part(bdev); vdev_bdev_close(bdev, mode); - if (error == 0) - bdev_retry_count = 100; + if (error == 0) { + timeout = MSEC2NSEC( + zfs_vdev_open_timeout_ms * 2); + } } } } else { @@ -240,12 +253,12 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * and it is reasonable to sleep and retry before giving up. In * practice delays have been observed to be on the order of 100ms. */ + hrtime_t start = gethrtime(); bdev = ERR_PTR(-ENXIO); - while (IS_ERR(bdev) && count < bdev_retry_count) { + while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) { bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { schedule_timeout(MSEC_TO_TICK(10)); - count++; } else if (IS_ERR(bdev)) { break; } @@ -253,7 +266,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (IS_ERR(bdev)) { int error = -PTR_ERR(bdev); - vdev_dbgmsg(v, "open error=%d count=%d", error, count); + vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error, + (u_longlong_t)(gethrtime() - start), + (u_longlong_t)timeout); vd->vd_bdev = NULL; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); @@ -267,7 +282,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, struct request_queue *q = bdev_get_queue(vd->vd_bdev); /* Determine the physical block size */ - block_size = vdev_bdev_block_size(vd->vd_bdev); + int block_size = vdev_bdev_block_size(vd->vd_bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE;