Wait in libzfs_init() for the /dev/zfs device

While module loading itself is synchronous the creation of the /dev/zfs
device is not.  This is because /dev/zfs is typically created by a udev
rule after the module is registered and presented to user space through
sysfs.  This small window between module loading and device creation
can result in spurious failures of libzfs_init().

This patch closes that race by extending libzfs_init() so it can detect
that the modules are loaded and only if required wait for the /dev/zfs
device to be created.  This allows scripts to reliably use the following
shell construct without the need for additional error handling.

$ /sbin/modprobe zfs && /sbin/zpool import -a

To minimize the potential time waiting in libzfs_init() a strategy
similar to adaptive mutexes is employed.  The function will busy-wait
for up to 10ms based on the expectation that the modules were just
loaded and therefore the /dev/zfs will be created imminently.  If it
takes longer than this it will fall back to polling for up to 10 seconds.

This behavior can be customized to some degree by setting the following
new environment variables.  This functionality is provided for backwards
compatibility with existing scripts which depend on the module auto-load
behavior.  By default module auto-loading is now disabled.

* ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
* ZFS_MODULE_TIMEOUT="<seconds>"     - Seconds to wait for /dev/zfs

The zfs-import-* systemd service files have been updated to call
'/sbin/modprobe zfs' so they no longer rely on the legacy auto-loading
behavior.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chris Dunlap <cdunlap@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Closes #2556
This commit is contained in:
Brian Behlendorf 2015-05-20 14:36:37 -07:00
parent c11cd7f934
commit 87abfcba22
3 changed files with 67 additions and 6 deletions

View File

@ -9,4 +9,4 @@ ConditionPathExists=@sysconfdir@/zfs/zpool.cache
[Service] [Service]
Type=oneshot Type=oneshot
RemainAfterExit=yes RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN ExecStart=/sbin/modprobe zfs && @sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN

View File

@ -9,4 +9,4 @@ ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
[Service] [Service]
Type=oneshot Type=oneshot
RemainAfterExit=yes RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -d /dev/disk/by-id -aN ExecStart=/sbin/modprobe zfs && @sbindir@/zpool import -d /dev/disk/by-id -aN

View File

@ -661,23 +661,84 @@ libzfs_run_process(const char *path, char *argv[], int flags)
return (-1); return (-1);
} }
/*
* Verify the required ZFS_DEV device is available and optionally attempt
* to load the ZFS modules. Under normal circumstances the modules
* should already have been loaded by some external mechanism.
*
* Environment variables:
* - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
* - ZFS_MODULE_TIMEOUT="<seconds>" - Seconds to wait for ZFS_DEV
*/
int int
libzfs_load_module(const char *module) libzfs_load_module(const char *module)
{ {
char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0}; char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0};
char *load_str, *timeout_str;
long timeout = 10; /* seconds */
long busy_timeout = 10; /* milliseconds */
int load = 0, fd;
hrtime_t start;
if (libzfs_module_loaded(module)) /* Optionally request module loading */
return (0); if (!libzfs_module_loaded(module)) {
load_str = getenv("ZFS_MODULE_LOADING");
if (load_str) {
if (!strncasecmp(load_str, "YES", strlen("YES")) ||
!strncasecmp(load_str, "ON", strlen("ON")))
load = 1;
else
load = 0;
}
return (libzfs_run_process("/sbin/modprobe", argv, 0)); if (load && libzfs_run_process("/sbin/modprobe", argv, 0))
return (ENOEXEC);
}
/* Module loading is synchronous it must be available */
if (!libzfs_module_loaded(module))
return (ENXIO);
/*
* Device creation by udev is asynchronous and waiting may be
* required. Busy wait for 10ms and then fall back to polling every
* 10ms for the allowed timeout (default 10s, max 10m). This is
* done to optimize for the common case where the device is
* immediately available and to avoid penalizing the possible
* case where udev is slow or unable to create the device.
*/
timeout_str = getenv("ZFS_MODULE_TIMEOUT");
if (timeout_str) {
timeout = strtol(timeout_str, NULL, 0);
timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */
}
start = gethrtime();
do {
fd = open(ZFS_DEV, O_RDWR);
if (fd >= 0) {
(void) close(fd);
return (0);
} else if (errno != ENOENT) {
return (errno);
} else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
sched_yield();
} else {
usleep(10 * MILLISEC);
}
} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));
return (ENOENT);
} }
libzfs_handle_t * libzfs_handle_t *
libzfs_init(void) libzfs_init(void)
{ {
libzfs_handle_t *hdl; libzfs_handle_t *hdl;
int error;
if (libzfs_load_module("zfs") != 0) { error = libzfs_load_module(ZFS_DRIVER);
if (error) {
(void) fprintf(stderr, gettext("Failed to load ZFS module " (void) fprintf(stderr, gettext("Failed to load ZFS module "
"stack.\nLoad the module manually by running " "stack.\nLoad the module manually by running "
"'insmod <location>/zfs.ko' as root.\n")); "'insmod <location>/zfs.ko' as root.\n"));