mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-18 10:21:01 +03:00
0382362ce0
A plain rewrite of the shell version, and generates identical units, save for replacing some empty lines with nothing, having fewer meaningless spaces in After=s and different spacing in the lock scripts, for a clean git diff -w This is a gain of anywhere from 0m0.336s vs 0m0.022s (15.27x) to 0m0.202s vs 0m0.006s (33.67x), depending on the hardware, a.k.a. from "absolutely unusable" to "perfectly fine" This also properly deals with canmount=noauto units across multiple pools See PR for detailed timings (of an early version) and diffs Reviewed-by: Antonio Russo <aerusso@aerusso.net> Reviewed-by: Richard Laager <rlaager@wiktel.com> Reviewed-by: InsanePrawn <insane.prawny@gmail.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz> Issue #11915 Closes #11917
1090 lines
30 KiB
C
1090 lines
30 KiB
C
/*
|
|
* Copyright (c) 2017 Antonio Russo <antonio.e.russo@gmail.com>
|
|
* Copyright (c) 2020 InsanePrawn <insane.prawny@gmail.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
|
|
#include <sys/resource.h>
|
|
#include <sys/types.h>
|
|
#include <sys/time.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
#include <semaphore.h>
|
|
#include <stdbool.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <time.h>
|
|
#include <regex.h>
|
|
#include <search.h>
|
|
#include <dirent.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <limits.h>
|
|
#include <errno.h>
|
|
#include <libzfs.h>
|
|
|
|
#define STRCMP ((int(*)(const void *, const void *))&strcmp)
|
|
#define PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp)
|
|
|
|
static int
|
|
pid_t_cmp(const pid_t *lhs, const pid_t *rhs)
|
|
{
|
|
/*
|
|
* This is always valid, quoth sys_types.h(7posix):
|
|
* > blksize_t, pid_t, and ssize_t shall be signed integer types.
|
|
*/
|
|
return (*lhs - *rhs);
|
|
}
|
|
|
|
#define EXIT_ENOMEM() \
|
|
do { \
|
|
fprintf(stderr, PROGNAME "[%d]: " \
|
|
"not enough memory (L%d)!\n", getpid(), __LINE__); \
|
|
_exit(1); \
|
|
} while (0)
|
|
|
|
|
|
#define PROGNAME "zfs-mount-generator"
|
|
#define FSLIST SYSCONFDIR "/zfs/zfs-list.cache"
|
|
#define ZFS SBINDIR "/zfs"
|
|
|
|
#define OUTPUT_HEADER \
|
|
"# Automatically generated by " PROGNAME "\n" \
|
|
"\n"
|
|
|
|
/*
|
|
* Starts like the one in libzfs_util.c but also matches "//"
|
|
* and captures until the end, since we actually use it for path extraxion
|
|
*/
|
|
#define URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$"
|
|
static regex_t uri_regex;
|
|
|
|
static char *argv0;
|
|
|
|
static const char *destdir = "/tmp";
|
|
static int destdir_fd = -1;
|
|
|
|
static void *known_pools = NULL; /* tsearch() of C strings */
|
|
static struct {
|
|
sem_t noauto_not_on_sem;
|
|
|
|
sem_t noauto_names_sem;
|
|
size_t noauto_names_len;
|
|
size_t noauto_names_max;
|
|
char noauto_names[][NAME_MAX];
|
|
} *noauto_files;
|
|
|
|
|
|
static char *
|
|
systemd_escape(const char *input, const char *prepend, const char *append)
|
|
{
|
|
size_t len = strlen(input);
|
|
size_t applen = strlen(append);
|
|
size_t prelen = strlen(prepend);
|
|
char *ret = malloc(4 * len + prelen + applen + 1);
|
|
if (!ret)
|
|
EXIT_ENOMEM();
|
|
|
|
memcpy(ret, prepend, prelen);
|
|
char *out = ret + prelen;
|
|
|
|
const char *cur = input;
|
|
if (*cur == '.') {
|
|
memcpy(out, "\\x2e", 4);
|
|
out += 4;
|
|
++cur;
|
|
}
|
|
for (; *cur; ++cur) {
|
|
if (*cur == '/')
|
|
*(out++) = '-';
|
|
else if (strchr(
|
|
"0123456789"
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
":_.", *cur))
|
|
*(out++) = *cur;
|
|
else {
|
|
sprintf(out, "\\x%02x", (int)*cur);
|
|
out += 4;
|
|
}
|
|
}
|
|
|
|
memcpy(out, append, applen + 1);
|
|
return (ret);
|
|
}
|
|
|
|
static void
|
|
simplify_path(char *path)
|
|
{
|
|
char *out = path;
|
|
for (char *cur = path; *cur; ++cur) {
|
|
if (*cur == '/') {
|
|
while (*(cur + 1) == '/')
|
|
++cur;
|
|
*(out++) = '/';
|
|
} else
|
|
*(out++) = *cur;
|
|
}
|
|
|
|
*(out++) = '\0';
|
|
}
|
|
|
|
static bool
|
|
strendswith(const char *what, const char *suff)
|
|
{
|
|
size_t what_l = strlen(what);
|
|
size_t suff_l = strlen(suff);
|
|
|
|
return ((what_l >= suff_l) &&
|
|
(strcmp(what + what_l - suff_l, suff) == 0));
|
|
}
|
|
|
|
/* Assumes already-simplified path, doesn't modify input */
|
|
static char *
|
|
systemd_escape_path(char *input, const char *prepend, const char *append)
|
|
{
|
|
if (strcmp(input, "/") == 0) {
|
|
char *ret;
|
|
if (asprintf(&ret, "%s-%s", prepend, append) == -1)
|
|
EXIT_ENOMEM();
|
|
return (ret);
|
|
} else {
|
|
/*
|
|
* path_is_normalized() (flattened for absolute paths here),
|
|
* required for proper escaping
|
|
*/
|
|
if (strstr(input, "/./") || strstr(input, "/../") ||
|
|
strendswith(input, "/.") || strendswith(input, "/.."))
|
|
return (NULL);
|
|
|
|
|
|
if (input[0] == '/')
|
|
++input;
|
|
|
|
char *back = &input[strlen(input) - 1];
|
|
bool deslash = *back == '/';
|
|
if (deslash)
|
|
*back = '\0';
|
|
|
|
char *ret = systemd_escape(input, prepend, append);
|
|
|
|
if (deslash)
|
|
*back = '/';
|
|
return (ret);
|
|
}
|
|
}
|
|
|
|
static FILE *
|
|
fopenat(int dirfd, const char *pathname, int flags,
|
|
const char *stream_mode, mode_t mode)
|
|
{
|
|
int fd = openat(dirfd, pathname, flags, mode);
|
|
if (fd < 0)
|
|
return (NULL);
|
|
|
|
return (fdopen(fd, stream_mode));
|
|
}
|
|
|
|
static int
|
|
line_worker(char *line, const char *cachefile)
|
|
{
|
|
char *toktmp;
|
|
/* BEGIN CSTYLED */
|
|
const char *dataset = strtok_r(line, "\t", &toktmp);
|
|
char *p_mountpoint = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_canmount = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_atime = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_relatime = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_devices = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_exec = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_readonly = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_setuid = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_nbmand = strtok_r(NULL, "\t", &toktmp);
|
|
const char *p_encroot = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
char *p_keyloc = strtok_r(NULL, "\t", &toktmp) ?: strdupa("none");
|
|
const char *p_systemd_requires = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
const char *p_systemd_requiresmountsfor = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
const char *p_systemd_before = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
const char *p_systemd_after = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
char *p_systemd_wantedby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
|
|
char *p_systemd_requiredby = strtok_r(NULL, "\t", &toktmp) ?: strdupa("-");
|
|
const char *p_systemd_nofail = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
const char *p_systemd_ignore = strtok_r(NULL, "\t", &toktmp) ?: "-";
|
|
/* END CSTYLED */
|
|
|
|
const char *pool = dataset;
|
|
if ((toktmp = strchr(pool, '/')) != NULL)
|
|
pool = strndupa(pool, toktmp - pool);
|
|
|
|
if (p_nbmand == NULL) {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n",
|
|
getpid(), dataset);
|
|
return (1);
|
|
}
|
|
|
|
strncpy(argv0, dataset, strlen(argv0));
|
|
|
|
/* Minimal pre-requisites to mount a ZFS dataset */
|
|
const char *after = "zfs-import.target";
|
|
const char *wants = "zfs-import.target";
|
|
const char *bindsto = NULL;
|
|
char *wantedby = NULL;
|
|
char *requiredby = NULL;
|
|
bool noauto = false;
|
|
bool wantedby_append = true;
|
|
|
|
/*
|
|
* zfs-import.target is not needed if the pool is already imported.
|
|
* This avoids a dependency loop on root-on-ZFS systems:
|
|
* systemd-random-seed.service After (via RequiresMountsFor)
|
|
* var-lib.mount After
|
|
* zfs-import.target After
|
|
* zfs-import-{cache,scan}.service After
|
|
* cryptsetup.service After
|
|
* systemd-random-seed.service
|
|
*/
|
|
if (tfind(pool, &known_pools, STRCMP)) {
|
|
after = "";
|
|
wants = "";
|
|
}
|
|
|
|
if (strcmp(p_systemd_after, "-") == 0)
|
|
p_systemd_after = NULL;
|
|
if (strcmp(p_systemd_before, "-") == 0)
|
|
p_systemd_before = NULL;
|
|
if (strcmp(p_systemd_requires, "-") == 0)
|
|
p_systemd_requires = NULL;
|
|
if (strcmp(p_systemd_requiresmountsfor, "-") == 0)
|
|
p_systemd_requiresmountsfor = NULL;
|
|
|
|
|
|
if (strcmp(p_encroot, "-") != 0) {
|
|
char *keyloadunit =
|
|
systemd_escape(p_encroot, "zfs-load-key-", ".service");
|
|
|
|
if (strcmp(dataset, p_encroot) == 0) {
|
|
const char *keymountdep = NULL;
|
|
bool is_prompt = false;
|
|
|
|
regmatch_t uri_matches[3];
|
|
if (regexec(&uri_regex, p_keyloc,
|
|
sizeof (uri_matches) / sizeof (*uri_matches),
|
|
uri_matches, 0) == 0) {
|
|
p_keyloc[uri_matches[2].rm_eo] = '\0';
|
|
const char *path =
|
|
&p_keyloc[uri_matches[2].rm_so];
|
|
|
|
/*
|
|
* Assumes all URI keylocations need
|
|
* the mount for their path;
|
|
* http://, for example, wouldn't
|
|
* (but it'd need network-online.target et al.)
|
|
*/
|
|
keymountdep = path;
|
|
} else {
|
|
if (strcmp(p_keyloc, "prompt") != 0)
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"unknown non-URI keylocation=%s\n",
|
|
getpid(), dataset, p_keyloc);
|
|
|
|
is_prompt = true;
|
|
}
|
|
|
|
|
|
/* Generate the key-load .service unit */
|
|
FILE *keyloadunit_f = fopenat(destdir_fd, keyloadunit,
|
|
O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w",
|
|
0644);
|
|
if (!keyloadunit_f) {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"couldn't open %s under %s: %s\n",
|
|
getpid(), dataset, keyloadunit, destdir,
|
|
strerror(errno));
|
|
return (1);
|
|
}
|
|
|
|
fprintf(keyloadunit_f,
|
|
OUTPUT_HEADER
|
|
"[Unit]\n"
|
|
"Description=Load ZFS key for %s\n"
|
|
"SourcePath=" FSLIST "/%s\n"
|
|
"Documentation=man:zfs-mount-generator(8)\n"
|
|
"DefaultDependencies=no\n"
|
|
"Wants=%s\n"
|
|
"After=%s\n",
|
|
dataset, cachefile, wants, after);
|
|
|
|
if (p_systemd_requires)
|
|
fprintf(keyloadunit_f,
|
|
"Requires=%s\n", p_systemd_requires);
|
|
|
|
if (p_systemd_requiresmountsfor || keymountdep) {
|
|
fprintf(keyloadunit_f, "RequiresMountsFor=");
|
|
if (p_systemd_requiresmountsfor)
|
|
fprintf(keyloadunit_f,
|
|
"%s ", p_systemd_requiresmountsfor);
|
|
if (keymountdep)
|
|
fprintf(keyloadunit_f,
|
|
"'%s'", keymountdep);
|
|
fprintf(keyloadunit_f, "\n");
|
|
}
|
|
|
|
/* BEGIN CSTYLED */
|
|
fprintf(keyloadunit_f,
|
|
"\n"
|
|
"[Service]\n"
|
|
"Type=oneshot\n"
|
|
"RemainAfterExit=yes\n"
|
|
"# This avoids a dependency loop involving systemd-journald.socket if this\n"
|
|
"# dataset is a parent of the root filesystem.\n"
|
|
"StandardOutput=null\n"
|
|
"StandardError=null\n"
|
|
"ExecStart=/bin/sh -c '"
|
|
"set -eu;"
|
|
"keystatus=\"$$(" ZFS " get -H -o value keystatus \"%s\")\";"
|
|
"[ \"$$keystatus\" = \"unavailable\" ] || exit 0;",
|
|
dataset);
|
|
if (is_prompt)
|
|
fprintf(keyloadunit_f,
|
|
"count=0;"
|
|
"while [ $$count -lt 3 ]; do "
|
|
"systemd-ask-password --id=\"zfs:%s\" \"Enter passphrase for %s:\" |"
|
|
"" ZFS " load-key \"%s\" && exit 0;"
|
|
"count=$$((count + 1));"
|
|
"done;"
|
|
"exit 1",
|
|
dataset, dataset, dataset);
|
|
else
|
|
fprintf(keyloadunit_f,
|
|
"" ZFS " load-key \"%s\"",
|
|
dataset);
|
|
|
|
fprintf(keyloadunit_f,
|
|
"'\n"
|
|
"ExecStop=/bin/sh -c '"
|
|
"set -eu;"
|
|
"keystatus=\"$$(" ZFS " get -H -o value keystatus \"%s\")\";"
|
|
"[ \"$$keystatus\" = \"available\" ] || exit 0;"
|
|
"" ZFS " unload-key \"%s\""
|
|
"'\n",
|
|
dataset, dataset);
|
|
/* END CSTYLED */
|
|
|
|
(void) fclose(keyloadunit_f);
|
|
}
|
|
|
|
/* Update dependencies for the mount file to want this */
|
|
bindsto = keyloadunit;
|
|
if (after[0] == '\0')
|
|
after = keyloadunit;
|
|
else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1)
|
|
after = toktmp;
|
|
else
|
|
EXIT_ENOMEM();
|
|
}
|
|
|
|
|
|
/* Skip generation of the mount unit if org.openzfs.systemd:ignore=on */
|
|
if (strcmp(p_systemd_ignore, "-") == 0 ||
|
|
strcmp(p_systemd_ignore, "off") == 0) {
|
|
/* ok */
|
|
} else if (strcmp(p_systemd_ignore, "on") == 0)
|
|
return (0);
|
|
else {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"invalid org.openzfs.systemd:ignore=%s\n",
|
|
getpid(), dataset, p_systemd_ignore);
|
|
return (1);
|
|
}
|
|
|
|
/* Check for canmount */
|
|
if (strcmp(p_canmount, "on") == 0) {
|
|
/* ok */
|
|
} else if (strcmp(p_canmount, "noauto") == 0)
|
|
noauto = true;
|
|
else if (strcmp(p_canmount, "off") == 0)
|
|
return (0);
|
|
else {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n",
|
|
getpid(), dataset, p_canmount);
|
|
return (1);
|
|
}
|
|
|
|
/* Check for legacy and blank mountpoints */
|
|
if (strcmp(p_mountpoint, "legacy") == 0 ||
|
|
strcmp(p_mountpoint, "none") == 0)
|
|
return (0);
|
|
else if (p_mountpoint[0] != '/') {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n",
|
|
getpid(), dataset, p_mountpoint);
|
|
return (1);
|
|
}
|
|
|
|
/* Escape the mountpoint per systemd policy */
|
|
simplify_path(p_mountpoint);
|
|
const char *mountfile = systemd_escape_path(p_mountpoint, "", ".mount");
|
|
if (mountfile == NULL) {
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n",
|
|
getpid(), dataset, p_mountpoint);
|
|
return (1);
|
|
}
|
|
|
|
|
|
/*
|
|
* Parse options, cf. lib/libzfs/libzfs_mount.c:zfs_add_options
|
|
*
|
|
* The longest string achievable here is
|
|
* ",atime,strictatime,nodev,noexec,rw,nosuid,nomand".
|
|
*/
|
|
char opts[64] = "";
|
|
|
|
/* atime */
|
|
if (strcmp(p_atime, "on") == 0) {
|
|
/* relatime */
|
|
if (strcmp(p_relatime, "on") == 0)
|
|
strcat(opts, ",atime,relatime");
|
|
else if (strcmp(p_relatime, "off") == 0)
|
|
strcat(opts, ",atime,strictatime");
|
|
else
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: %s: invalid relatime=%s\n",
|
|
getpid(), dataset, p_relatime);
|
|
} else if (strcmp(p_atime, "off") == 0) {
|
|
strcat(opts, ",noatime");
|
|
} else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid atime=%s\n",
|
|
getpid(), dataset, p_atime);
|
|
|
|
/* devices */
|
|
if (strcmp(p_devices, "on") == 0)
|
|
strcat(opts, ",dev");
|
|
else if (strcmp(p_devices, "off") == 0)
|
|
strcat(opts, ",nodev");
|
|
else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid devices=%s\n",
|
|
getpid(), dataset, p_devices);
|
|
|
|
/* exec */
|
|
if (strcmp(p_exec, "on") == 0)
|
|
strcat(opts, ",exec");
|
|
else if (strcmp(p_exec, "off") == 0)
|
|
strcat(opts, ",noexec");
|
|
else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid exec=%s\n",
|
|
getpid(), dataset, p_exec);
|
|
|
|
/* readonly */
|
|
if (strcmp(p_readonly, "on") == 0)
|
|
strcat(opts, ",ro");
|
|
else if (strcmp(p_readonly, "off") == 0)
|
|
strcat(opts, ",rw");
|
|
else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid readonly=%s\n",
|
|
getpid(), dataset, p_readonly);
|
|
|
|
/* setuid */
|
|
if (strcmp(p_setuid, "on") == 0)
|
|
strcat(opts, ",suid");
|
|
else if (strcmp(p_setuid, "off") == 0)
|
|
strcat(opts, ",nosuid");
|
|
else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid setuid=%s\n",
|
|
getpid(), dataset, p_setuid);
|
|
|
|
/* nbmand */
|
|
if (strcmp(p_nbmand, "on") == 0)
|
|
strcat(opts, ",mand");
|
|
else if (strcmp(p_nbmand, "off") == 0)
|
|
strcat(opts, ",nomand");
|
|
else
|
|
fprintf(stderr, PROGNAME "[%d]: %s: invalid nbmand=%s\n",
|
|
getpid(), dataset, p_setuid);
|
|
|
|
if (strcmp(p_systemd_wantedby, "-") != 0) {
|
|
noauto = true;
|
|
|
|
if (strcmp(p_systemd_wantedby, "none") != 0)
|
|
wantedby = p_systemd_wantedby;
|
|
}
|
|
|
|
if (strcmp(p_systemd_requiredby, "-") != 0) {
|
|
noauto = true;
|
|
|
|
if (strcmp(p_systemd_requiredby, "none") != 0)
|
|
requiredby = p_systemd_requiredby;
|
|
}
|
|
|
|
/*
|
|
* For datasets with canmount=on, a dependency is created for
|
|
* local-fs.target by default. To avoid regressions, this dependency
|
|
* is reduced to "wants" rather than "requires" when nofail!=off.
|
|
* **THIS MAY CHANGE**
|
|
* noauto=on disables this behavior completely.
|
|
*/
|
|
if (!noauto) {
|
|
if (strcmp(p_systemd_nofail, "off") == 0)
|
|
requiredby = strdupa("local-fs.target");
|
|
else {
|
|
wantedby = strdupa("local-fs.target");
|
|
wantedby_append = strcmp(p_systemd_nofail, "on") != 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle existing files:
|
|
* 1. We never overwrite existing files, although we may delete
|
|
* files if we're sure they were created by us. (see 5.)
|
|
* 2. We handle files differently based on canmount.
|
|
* Units with canmount=on always have precedence over noauto.
|
|
* This is enforced by the noauto_not_on_sem semaphore,
|
|
* which is only unlocked when the last canmount=on process exits.
|
|
* It is important to use p_canmount and not noauto here,
|
|
* since we categorise by canmount while other properties,
|
|
* e.g. org.openzfs.systemd:wanted-by, also modify noauto.
|
|
* 3. If no unit file exists for a noauto dataset, we create one.
|
|
* Additionally, we use noauto_files to track the unit file names
|
|
* (which are the systemd-escaped mountpoints) of all (exclusively)
|
|
* noauto datasets that had a file created.
|
|
* 4. If the file to be created is found in the tracking array,
|
|
* we do NOT create it.
|
|
* 5. If a file exists for a noauto dataset,
|
|
* we check whether the file name is in the array.
|
|
* If it is, we have multiple noauto datasets for the same
|
|
* mountpoint. In such cases, we remove the file for safety.
|
|
* We leave the file name in the tracking array to avoid
|
|
* further noauto datasets creating a file for this path again.
|
|
*/
|
|
|
|
{
|
|
sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ?
|
|
&noauto_files->noauto_names_sem :
|
|
&noauto_files->noauto_not_on_sem;
|
|
while (sem_wait(our_sem) == -1 && errno == EINTR)
|
|
;
|
|
}
|
|
|
|
struct stat stbuf;
|
|
bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0;
|
|
|
|
bool is_known = false;
|
|
for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) {
|
|
if (strncmp(
|
|
noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) {
|
|
is_known = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (already_exists) {
|
|
if (is_known) {
|
|
/* If it's in $noauto_files, we must be noauto too */
|
|
|
|
/* See 5 */
|
|
errno = 0;
|
|
(void) unlinkat(destdir_fd, mountfile, 0);
|
|
|
|
/* See 2 */
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"removing duplicate noauto unit %s%s%s\n",
|
|
getpid(), dataset, mountfile,
|
|
errno ? "" : " failed: ",
|
|
errno ? "" : strerror(errno));
|
|
} else {
|
|
/* Don't log for canmount=noauto */
|
|
if (strcmp(p_canmount, "on") == 0)
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"%s already exists. Skipping.\n",
|
|
getpid(), dataset, mountfile);
|
|
}
|
|
|
|
/* File exists: skip current dataset */
|
|
if (strcmp(p_canmount, "on") == 0)
|
|
sem_post(&noauto_files->noauto_names_sem);
|
|
return (0);
|
|
} else {
|
|
if (is_known) {
|
|
/* See 4 */
|
|
if (strcmp(p_canmount, "on") == 0)
|
|
sem_post(&noauto_files->noauto_names_sem);
|
|
return (0);
|
|
} else if (strcmp(p_canmount, "noauto") == 0) {
|
|
if (noauto_files->noauto_names_len ==
|
|
noauto_files->noauto_names_max)
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"noauto dataset limit (%zu) reached! "
|
|
"Not tracking %s. Please report this to "
|
|
"https://github.com/openzfs/zfs\n",
|
|
getpid(), dataset,
|
|
noauto_files->noauto_names_max, mountfile);
|
|
else {
|
|
strncpy(noauto_files->noauto_names[
|
|
noauto_files->noauto_names_len],
|
|
mountfile, NAME_MAX);
|
|
++noauto_files->noauto_names_len;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
FILE *mountfile_f = fopenat(destdir_fd, mountfile,
|
|
O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644);
|
|
if (strcmp(p_canmount, "on") == 0)
|
|
sem_post(&noauto_files->noauto_names_sem);
|
|
if (!mountfile_f) {
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n",
|
|
getpid(), dataset, mountfile, destdir, strerror(errno));
|
|
return (1);
|
|
}
|
|
|
|
fprintf(mountfile_f,
|
|
OUTPUT_HEADER
|
|
"[Unit]\n"
|
|
"SourcePath=" FSLIST "/%s\n"
|
|
"Documentation=man:zfs-mount-generator(8)\n"
|
|
"\n"
|
|
"Before=",
|
|
cachefile);
|
|
|
|
if (p_systemd_before)
|
|
fprintf(mountfile_f, "%s ", p_systemd_before);
|
|
fprintf(mountfile_f, "zfs-mount.service"); /* Ensures we don't race */
|
|
if (requiredby)
|
|
fprintf(mountfile_f, " %s", requiredby);
|
|
if (wantedby && wantedby_append)
|
|
fprintf(mountfile_f, " %s", wantedby);
|
|
|
|
fprintf(mountfile_f,
|
|
"\n"
|
|
"After=");
|
|
if (p_systemd_after)
|
|
fprintf(mountfile_f, "%s ", p_systemd_after);
|
|
fprintf(mountfile_f, "%s\n", after);
|
|
|
|
fprintf(mountfile_f, "Wants=%s\n", wants);
|
|
|
|
if (bindsto)
|
|
fprintf(mountfile_f, "BindsTo=%s\n", bindsto);
|
|
if (p_systemd_requires)
|
|
fprintf(mountfile_f, "Requires=%s\n", p_systemd_requires);
|
|
if (p_systemd_requiresmountsfor)
|
|
fprintf(mountfile_f,
|
|
"RequiresMountsFor=%s\n", p_systemd_requiresmountsfor);
|
|
|
|
fprintf(mountfile_f,
|
|
"\n"
|
|
"[Mount]\n"
|
|
"Where=%s\n"
|
|
"What=%s\n"
|
|
"Type=zfs\n"
|
|
"Options=defaults%s,zfsutil\n",
|
|
p_mountpoint, dataset, opts);
|
|
|
|
(void) fclose(mountfile_f);
|
|
|
|
if (!requiredby && !wantedby)
|
|
return (0);
|
|
|
|
/* Finally, create the appropriate dependencies */
|
|
char *linktgt;
|
|
if (asprintf(&linktgt, "../%s", mountfile) == -1)
|
|
EXIT_ENOMEM();
|
|
|
|
char *dependencies[][2] = {
|
|
{"wants", wantedby},
|
|
{"requires", requiredby},
|
|
{}
|
|
};
|
|
for (__typeof__(&*dependencies) dep = &*dependencies; **dep; ++dep) {
|
|
if (!(*dep)[1])
|
|
continue;
|
|
|
|
for (char *reqby = strtok_r((*dep)[1], " ", &toktmp);
|
|
reqby;
|
|
reqby = strtok_r(NULL, " ", &toktmp)) {
|
|
char *depdir;
|
|
if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1)
|
|
EXIT_ENOMEM();
|
|
|
|
(void) mkdirat(destdir_fd, depdir, 0755);
|
|
int depdir_fd = openat(destdir_fd, depdir,
|
|
O_PATH | O_DIRECTORY | O_CLOEXEC);
|
|
if (depdir_fd < 0) {
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"couldn't open %s under %s: %s\n",
|
|
getpid(), dataset, depdir, destdir,
|
|
strerror(errno));
|
|
free(depdir);
|
|
continue;
|
|
}
|
|
|
|
if (symlinkat(linktgt, depdir_fd, mountfile) == -1)
|
|
fprintf(stderr, PROGNAME "[%d]: %s: "
|
|
"couldn't symlink at "
|
|
"%s under %s under %s: %s\n",
|
|
getpid(), dataset, mountfile,
|
|
depdir, destdir, strerror(errno));
|
|
|
|
(void) close(depdir_fd);
|
|
free(depdir);
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
|
|
static int
|
|
pool_enumerator(zpool_handle_t *pool, void *data __attribute__((unused)))
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* Pools are guaranteed-unique by the kernel,
|
|
* no risk of leaking dupes here
|
|
*/
|
|
char *name = strdup(zpool_get_name(pool));
|
|
if (!name || !tsearch(name, &known_pools, STRCMP)) {
|
|
free(name);
|
|
ret = ENOMEM;
|
|
}
|
|
|
|
zpool_close(pool);
|
|
return (ret);
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
struct timespec time_init = {};
|
|
clock_gettime(CLOCK_MONOTONIC_RAW, &time_init);
|
|
|
|
{
|
|
int kmfd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
|
|
if (kmfd >= 0) {
|
|
(void) dup2(kmfd, STDERR_FILENO);
|
|
(void) close(kmfd);
|
|
}
|
|
}
|
|
|
|
uint8_t debug = 0;
|
|
|
|
argv0 = argv[0];
|
|
switch (argc) {
|
|
case 1:
|
|
/* Use default */
|
|
break;
|
|
case 2:
|
|
case 4:
|
|
destdir = argv[1];
|
|
break;
|
|
default:
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: wrong argument count: %d\n",
|
|
getpid(), argc - 1);
|
|
_exit(1);
|
|
}
|
|
|
|
{
|
|
destdir_fd = open(destdir, O_PATH | O_DIRECTORY | O_CLOEXEC);
|
|
if (destdir_fd < 0) {
|
|
fprintf(stderr, PROGNAME "[%d]: "
|
|
"can't open destination directory %s: %s\n",
|
|
getpid(), destdir, strerror(errno));
|
|
_exit(1);
|
|
}
|
|
}
|
|
|
|
DIR *fslist_dir = opendir(FSLIST);
|
|
if (!fslist_dir) {
|
|
if (errno != ENOENT)
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: couldn't open " FSLIST ": %s\n",
|
|
getpid(), strerror(errno));
|
|
_exit(0);
|
|
}
|
|
|
|
{
|
|
libzfs_handle_t *libzfs = libzfs_init();
|
|
if (libzfs) {
|
|
if (zpool_iter(libzfs, pool_enumerator, NULL) != 0)
|
|
fprintf(stderr, PROGNAME "[%d]: "
|
|
"error listing pools, ignoring\n",
|
|
getpid());
|
|
libzfs_fini(libzfs);
|
|
} else
|
|
fprintf(stderr, PROGNAME "[%d]: "
|
|
"couldn't start libzfs, ignoring\n",
|
|
getpid());
|
|
}
|
|
|
|
{
|
|
int regerr = regcomp(&uri_regex, URI_REGEX_S, 0);
|
|
if (regerr != 0) {
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: invalid regex: %d\n",
|
|
getpid(), regerr);
|
|
_exit(1);
|
|
}
|
|
}
|
|
|
|
{
|
|
/*
|
|
* We could just get a gigabyte here and Not Care,
|
|
* but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored
|
|
* and we'd try (and likely fail) to rip it out of swap
|
|
*/
|
|
noauto_files = mmap(NULL, 4 * 1024 * 1024,
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
|
|
if (noauto_files == MAP_FAILED) {
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: couldn't allocate IPC region: %s\n",
|
|
getpid(), strerror(errno));
|
|
_exit(1);
|
|
}
|
|
|
|
sem_init(&noauto_files->noauto_not_on_sem, true, 0);
|
|
sem_init(&noauto_files->noauto_names_sem, true, 1);
|
|
noauto_files->noauto_names_len = 0;
|
|
/* Works out to 16447ish, *well* enough */
|
|
noauto_files->noauto_names_max =
|
|
(4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX;
|
|
}
|
|
|
|
char *line = NULL;
|
|
size_t linelen = 0;
|
|
struct timespec time_start = {};
|
|
{
|
|
const char *dbgenv = getenv("ZFS_DEBUG");
|
|
if (dbgenv)
|
|
debug = atoi(dbgenv);
|
|
else {
|
|
FILE *cmdline = fopen("/proc/cmdline", "re");
|
|
if (cmdline != NULL) {
|
|
if (getline(&line, &linelen, cmdline) >= 0)
|
|
debug = strstr(line, "debug") ? 2 : 0;
|
|
(void) fclose(cmdline);
|
|
}
|
|
}
|
|
|
|
if (debug && !isatty(STDOUT_FILENO))
|
|
dup2(STDERR_FILENO, STDOUT_FILENO);
|
|
}
|
|
|
|
size_t forked_canmount_on = 0;
|
|
size_t forked_canmount_not_on = 0;
|
|
size_t canmount_on_pids_len = 128;
|
|
pid_t *canmount_on_pids =
|
|
malloc(canmount_on_pids_len * sizeof (*canmount_on_pids));
|
|
if (canmount_on_pids == NULL)
|
|
canmount_on_pids_len = 0;
|
|
|
|
if (debug)
|
|
clock_gettime(CLOCK_MONOTONIC_RAW, &time_start);
|
|
|
|
ssize_t read;
|
|
pid_t pid;
|
|
struct dirent *cachent;
|
|
while ((cachent = readdir(fslist_dir)) != NULL) {
|
|
if (strcmp(cachent->d_name, ".") == 0 ||
|
|
strcmp(cachent->d_name, "..") == 0)
|
|
continue;
|
|
|
|
FILE *cachefile = fopenat(dirfd(fslist_dir), cachent->d_name,
|
|
O_RDONLY | O_CLOEXEC, "r", 0);
|
|
if (!cachefile) {
|
|
fprintf(stderr, PROGNAME "[%d]: "
|
|
"couldn't open %s under " FSLIST ": %s\n",
|
|
getpid(), cachent->d_name, strerror(errno));
|
|
continue;
|
|
}
|
|
|
|
while ((read = getline(&line, &linelen, cachefile)) >= 0) {
|
|
line[read - 1] = '\0'; /* newline */
|
|
|
|
switch (pid = fork()) {
|
|
case -1:
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: couldn't fork for %s: %s\n",
|
|
getpid(), line, strerror(errno));
|
|
break;
|
|
case 0: /* child */
|
|
_exit(line_worker(line, cachent->d_name));
|
|
default: { /* parent */
|
|
char *tmp;
|
|
char *dset = strtok_r(line, "\t", &tmp);
|
|
strtok_r(NULL, "\t", &tmp);
|
|
char *canmount = strtok_r(NULL, "\t", &tmp);
|
|
bool canmount_on =
|
|
canmount && strncmp(canmount, "on", 2) == 0;
|
|
|
|
if (debug >= 2)
|
|
printf(PROGNAME ": forked %d, "
|
|
"canmount_on=%d, dataset=%s\n",
|
|
(int)pid, canmount_on, dset);
|
|
|
|
if (canmount_on &&
|
|
forked_canmount_on ==
|
|
canmount_on_pids_len) {
|
|
size_t new_len =
|
|
(canmount_on_pids_len ?: 16) * 2;
|
|
void *new_pidlist =
|
|
realloc(canmount_on_pids,
|
|
new_len *
|
|
sizeof (*canmount_on_pids));
|
|
if (!new_pidlist) {
|
|
fprintf(stderr,
|
|
PROGNAME "[%d]: "
|
|
"out of memory! "
|
|
"Mount ordering may be "
|
|
"affected.\n", getpid());
|
|
continue;
|
|
}
|
|
|
|
canmount_on_pids = new_pidlist;
|
|
canmount_on_pids_len = new_len;
|
|
}
|
|
|
|
if (canmount_on) {
|
|
canmount_on_pids[forked_canmount_on] =
|
|
pid;
|
|
++forked_canmount_on;
|
|
} else
|
|
++forked_canmount_not_on;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
(void) fclose(cachefile);
|
|
}
|
|
free(line);
|
|
|
|
if (forked_canmount_on == 0) {
|
|
/* No canmount=on processes to finish, so don't deadlock here */
|
|
for (size_t i = 0; i < forked_canmount_not_on; ++i)
|
|
sem_post(&noauto_files->noauto_not_on_sem);
|
|
} else {
|
|
/* Likely a no-op, since we got these from a narrow fork loop */
|
|
qsort(canmount_on_pids, forked_canmount_on,
|
|
sizeof (*canmount_on_pids), PID_T_CMP);
|
|
}
|
|
|
|
int status, ret = 0;
|
|
struct rusage usage;
|
|
size_t forked_canmount_on_max = forked_canmount_on;
|
|
while ((pid = wait4(-1, &status, 0, &usage)) != -1) {
|
|
ret |= WEXITSTATUS(status) | WTERMSIG(status);
|
|
|
|
if (forked_canmount_on != 0) {
|
|
if (bsearch(&pid, canmount_on_pids,
|
|
forked_canmount_on_max, sizeof (*canmount_on_pids),
|
|
PID_T_CMP))
|
|
--forked_canmount_on;
|
|
|
|
if (forked_canmount_on == 0) {
|
|
/*
|
|
* All canmount=on processes have finished,
|
|
* let all the lower-priority ones finish now
|
|
*/
|
|
for (size_t i = 0;
|
|
i < forked_canmount_not_on; ++i)
|
|
sem_post(
|
|
&noauto_files->noauto_not_on_sem);
|
|
}
|
|
}
|
|
|
|
if (debug >= 2)
|
|
printf(PROGNAME ": %d done, user=%llu.%06us, "
|
|
"system=%llu.%06us, maxrss=%ldB, ex=0x%x\n",
|
|
(int)pid,
|
|
(unsigned long long) usage.ru_utime.tv_sec,
|
|
(unsigned int) usage.ru_utime.tv_usec,
|
|
(unsigned long long) usage.ru_stime.tv_sec,
|
|
(unsigned int) usage.ru_stime.tv_usec,
|
|
usage.ru_maxrss * 1024, status);
|
|
}
|
|
|
|
if (debug) {
|
|
struct timespec time_end = {};
|
|
clock_gettime(CLOCK_MONOTONIC_RAW, &time_end);
|
|
|
|
getrusage(RUSAGE_SELF, &usage);
|
|
printf(
|
|
"\n"
|
|
PROGNAME ": self : "
|
|
"user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
|
|
(unsigned long long) usage.ru_utime.tv_sec,
|
|
(unsigned int) usage.ru_utime.tv_usec,
|
|
(unsigned long long) usage.ru_stime.tv_sec,
|
|
(unsigned int) usage.ru_stime.tv_usec,
|
|
usage.ru_maxrss * 1024);
|
|
|
|
getrusage(RUSAGE_CHILDREN, &usage);
|
|
printf(PROGNAME ": children: "
|
|
"user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
|
|
(unsigned long long) usage.ru_utime.tv_sec,
|
|
(unsigned int) usage.ru_utime.tv_usec,
|
|
(unsigned long long) usage.ru_stime.tv_sec,
|
|
(unsigned int) usage.ru_stime.tv_usec,
|
|
usage.ru_maxrss * 1024);
|
|
|
|
if (time_start.tv_nsec > time_end.tv_nsec) {
|
|
time_end.tv_nsec =
|
|
1000000000 + time_end.tv_nsec - time_start.tv_nsec;
|
|
time_end.tv_sec -= 1;
|
|
} else
|
|
time_end.tv_nsec -= time_start.tv_nsec;
|
|
time_end.tv_sec -= time_start.tv_sec;
|
|
|
|
if (time_init.tv_nsec > time_start.tv_nsec) {
|
|
time_start.tv_nsec =
|
|
1000000000 + time_start.tv_nsec - time_init.tv_nsec;
|
|
time_start.tv_sec -= 1;
|
|
} else
|
|
time_start.tv_nsec -= time_init.tv_nsec;
|
|
time_start.tv_sec -= time_init.tv_sec;
|
|
|
|
time_init.tv_nsec = time_start.tv_nsec + time_end.tv_nsec;
|
|
time_init.tv_sec =
|
|
time_start.tv_sec + time_end.tv_sec +
|
|
time_init.tv_nsec / 1000000000;
|
|
time_init.tv_nsec %= 1000000000;
|
|
|
|
printf(PROGNAME ": wall : "
|
|
"total=%llu.%09llus = "
|
|
"init=%llu.%09llus + real=%llu.%09llus\n",
|
|
(unsigned long long) time_init.tv_sec,
|
|
(unsigned long long) time_init.tv_nsec,
|
|
(unsigned long long) time_start.tv_sec,
|
|
(unsigned long long) time_start.tv_nsec,
|
|
(unsigned long long) time_end.tv_sec,
|
|
(unsigned long long) time_end.tv_nsec);
|
|
}
|
|
|
|
_exit(ret);
|
|
}
|