2014-01-22 01:30:03 +04:00
|
|
|
/*
|
2020-10-09 06:10:13 +03:00
|
|
|
* This file is part of the ZFS Event Daemon (ZED).
|
|
|
|
*
|
2014-01-22 01:30:03 +04:00
|
|
|
* Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
|
|
|
|
* Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
|
2015-05-07 01:56:03 +03:00
|
|
|
* Refer to the ZoL git commit log for authoritative copyright attribution.
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License Version 1.0 (CDDL-1.0).
|
|
|
|
* You can obtain a copy of the license from the top-level file
|
|
|
|
* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
|
|
|
|
* You may not use this file except in compliance with the license.
|
2014-01-22 01:30:03 +04:00
|
|
|
*/
|
|
|
|
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 16:10:34 +03:00
|
|
|
#include <dirent.h>
|
2014-01-22 01:30:03 +04:00
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
2020-06-11 23:25:39 +03:00
|
|
|
#include "zed_file.h"
|
2014-01-22 01:30:03 +04:00
|
|
|
#include "zed_log.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set an exclusive advisory lock on the open file descriptor [fd].
|
|
|
|
* Return 0 on success, 1 if a conflicting lock is held by another process,
|
2014-09-11 01:22:39 +04:00
|
|
|
* or -1 on error (with errno set).
|
2014-01-22 01:30:03 +04:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
zed_file_lock(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_WRLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_SETLK, &lock) < 0) {
|
|
|
|
if ((errno == EACCES) || (errno == EAGAIN))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release an advisory lock held on the open file descriptor [fd].
|
|
|
|
* Return 0 on success, or -1 on error (with errno set).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zed_file_unlock(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_UNLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_SETLK, &lock) < 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test whether an exclusive advisory lock could be obtained for the open
|
2014-09-11 01:22:39 +04:00
|
|
|
* file descriptor [fd].
|
2014-08-29 01:39:48 +04:00
|
|
|
* Return 0 if the file is not locked, >0 for the PID of another process
|
2014-09-11 01:22:39 +04:00
|
|
|
* holding a conflicting lock, or -1 on error (with errno set).
|
2014-01-22 01:30:03 +04:00
|
|
|
*/
|
|
|
|
pid_t
|
|
|
|
zed_file_is_locked(int fd)
|
|
|
|
{
|
|
|
|
struct flock lock;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
errno = EBADF;
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
lock.l_type = F_WRLCK;
|
|
|
|
lock.l_whence = SEEK_SET;
|
|
|
|
lock.l_start = 0;
|
|
|
|
lock.l_len = 0;
|
|
|
|
|
|
|
|
if (fcntl(fd, F_GETLK, &lock) < 0)
|
|
|
|
return (-1);
|
|
|
|
|
|
|
|
if (lock.l_type == F_UNLCK)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
return (lock.l_pid);
|
|
|
|
}
|
|
|
|
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 16:10:34 +03:00
|
|
|
|
|
|
|
#if __APPLE__
|
|
|
|
#define PROC_SELF_FD "/dev/fd"
|
|
|
|
#else /* Linux-compatible layout */
|
|
|
|
#define PROC_SELF_FD "/proc/self/fd"
|
|
|
|
#endif
|
|
|
|
|
2014-01-22 01:30:03 +04:00
|
|
|
/*
|
|
|
|
* Close all open file descriptors greater than or equal to [lowfd].
|
|
|
|
* Any errors encountered while closing file descriptors are ignored.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
zed_file_close_from(int lowfd)
|
|
|
|
{
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 16:10:34 +03:00
|
|
|
int errno_bak = errno;
|
|
|
|
int maxfd = 0;
|
2014-01-22 01:30:03 +04:00
|
|
|
int fd;
|
zed: implement close_from() in terms of /proc/self/fd, if available
/dev/fd on Darwin
Consider the following strace output:
prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
Yes, that is well over a million file descriptors!
This reduces the ZED start-up time from "at least a second" to
"instantaneous", and, under strace, from "don't even try" to "usable"
by simple virtue of doing five syscalls instead of over a million;
in most cases the main loop does nothing
Recent Linuxes (5.8+) have close_range(2) for this, but that's an
overoptimisation (and libcs don't have wrappers for it yet)
This is also run by the ZEDLET pre-exec. Compare:
Finished "all-syslog.sh" eid=13 pid=6717 time=1.027100s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=13 pid=6718 time=1.046923s exit=0
to
Finished "all-syslog.sh" eid=12 pid=4834 time=0.001836s exit=0
Finished "history_event-zfs-list-cacher.sh" eid=12 pid=4835 time=0.001346s exit=0
lol
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-02 16:10:34 +03:00
|
|
|
DIR *fddir;
|
|
|
|
struct dirent *fdent;
|
|
|
|
|
|
|
|
if ((fddir = opendir(PROC_SELF_FD)) != NULL) {
|
|
|
|
while ((fdent = readdir(fddir)) != NULL) {
|
|
|
|
fd = atoi(fdent->d_name);
|
|
|
|
if (fd > maxfd && fd != dirfd(fddir))
|
|
|
|
maxfd = fd;
|
|
|
|
}
|
|
|
|
(void) closedir(fddir);
|
2014-01-22 01:30:03 +04:00
|
|
|
} else {
|
zed: only go up to current limit in close_from() fallback
Consider the following strace log:
prlimit64(0, RLIMIT_NOFILE,
NULL, {rlim_cur=1024, rlim_max=1024*1024}) = 0
dup2(0, 30) = 30
dup2(0, 300) = 300
dup2(0, 3000) = -1 EBADF (Bad file descriptor)
dup2(0, 30000) = -1 EBADF (Bad file descriptor)
dup2(0, 300000) = -1 EBADF (Bad file descriptor)
prlimit64(0, RLIMIT_NOFILE,
{rlim_cur=1024*1024, rlim_max=1024*1024}, NULL) = 0
dup2(0, 30) = 30
dup2(0, 300) = 300
dup2(0, 3000) = 3000
dup2(0, 30000) = 30000
dup2(0, 300000) = 300000
Even a privileged process needs to bump its rlimit before being able
to use fds higher than rlim_cur.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11834
2021-04-03 13:09:24 +03:00
|
|
|
maxfd = sysconf(_SC_OPEN_MAX);
|
2014-01-22 01:30:03 +04:00
|
|
|
}
|
|
|
|
for (fd = lowfd; fd < maxfd; fd++)
|
|
|
|
(void) close(fd);
|
|
|
|
|
|
|
|
errno = errno_bak;
|
|
|
|
}
|