mirror_zfs/tests/zfs-tests/cmd/idmap_util.c
youzhongyang 2a068a1394
Support idmapped mount
Adds support for idmapped mounts.  Supported as of Linux 5.12 this 
functionality allows user and group IDs to be remapped without changing 
their state on disk.  This can be useful for portable home directories
and a variety of container related use cases.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Youzhong Yang <yyang@mathworks.com>
Closes #12923
Closes #13671
2022-10-19 11:17:09 -07:00

792 lines
16 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <linux/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <fcntl.h>
#include <errno.h>
#include <sched.h>
#include <syscall.h>
#include <sys/list.h>
#ifndef UINT_MAX
#define UINT_MAX 4294967295U
#endif
#ifndef __NR_Linux
#if defined __alpha__
#define __NR_Linux 110
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32
#define __NR_Linux 4000
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32
#define __NR_Linux 6000
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64
#define __NR_Linux 5000
#endif
#elif defined __ia64__
#define __NR_Linux 1024
#else
#define __NR_Linux 0
#endif
#endif
#ifndef __NR_mount_setattr
#define __NR_mount_setattr (442 + __NR_Linux)
#endif
#ifndef __NR_open_tree
#define __NR_open_tree (428 + __NR_Linux)
#endif
#ifndef __NR_move_mount
#define __NR_move_mount (429 + __NR_Linux)
#endif
#ifndef MNT_DETACH
#define MNT_DETACH 2
#endif
#ifndef MOVE_MOUNT_F_EMPTY_PATH
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
#endif
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
#ifndef OPEN_TREE_CLONE
#define OPEN_TREE_CLONE 1
#endif
#ifndef OPEN_TREE_CLOEXEC
#define OPEN_TREE_CLOEXEC O_CLOEXEC
#endif
#ifndef AT_RECURSIVE
#define AT_RECURSIVE 0x8000
#endif
#ifndef mount_attr
struct mount_attr {
__u64 attr_set;
__u64 attr_clr;
__u64 propagation;
__u64 userns_fd;
};
#endif
static inline int
sys_mount_setattr(int dfd, const char *path, unsigned int flags,
struct mount_attr *attr, size_t size)
{
return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size));
}
static inline int
sys_open_tree(int dfd, const char *filename, unsigned int flags)
{
return (syscall(__NR_open_tree, dfd, filename, flags));
}
static inline int sys_move_mount(int from_dfd, const char *from_pathname,
int to_dfd, const char *to_pathname, unsigned int flags)
{
return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
to_pathname, flags));
}
typedef enum idmap_type_t {
TYPE_UID,
TYPE_GID,
TYPE_BOTH
} idmap_type_t;
struct idmap_entry {
__u32 first;
__u32 lower_first;
__u32 count;
idmap_type_t type;
list_node_t node;
};
static void
log_msg(const char *msg, ...)
{
va_list ap;
va_start(ap, msg);
vfprintf(stderr, msg, ap);
fputc('\n', stderr);
va_end(ap);
}
#define log_errno(msg, args...) \
do { \
log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\
__FUNCTION__, ##args); \
} while (0)
/*
* Parse the idmapping in the following format
* and add to the list:
*
* u:nsid_first:hostid_first:count
* g:nsid_first:hostid_first:count
* b:nsid_first:hostid_first:count
*
* The delimiter can be : or space character.
*
* Return:
* 0 if success
* ENOMEM if out of memory
* EINVAL if wrong arg or input
*/
static int
parse_idmap_entry(list_t *head, char *input)
{
char *token, *savedptr = NULL;
struct idmap_entry *entry;
unsigned long ul;
char *delimiter = (char *)": ";
char c;
if (!input || !head)
return (EINVAL);
entry = malloc(sizeof (*entry));
if (!entry)
return (ENOMEM);
token = strtok_r(input, delimiter, &savedptr);
if (token)
c = token[0];
if (!token || (c != 'b' && c != 'u' && c != 'g'))
goto errout;
entry->type = (c == 'b') ? TYPE_BOTH :
((c == 'u') ? TYPE_UID : TYPE_GID);
token = strtok_r(NULL, delimiter, &savedptr);
if (!token)
goto errout;
ul = strtoul(token, NULL, 10);
if (ul > UINT_MAX || errno != 0)
goto errout;
entry->first = (__u32)ul;
token = strtok_r(NULL, delimiter, &savedptr);
if (!token)
goto errout;
ul = strtoul(token, NULL, 10);
if (ul > UINT_MAX || errno != 0)
goto errout;
entry->lower_first = (__u32)ul;
token = strtok_r(NULL, delimiter, &savedptr);
if (!token)
goto errout;
ul = strtoul(token, NULL, 10);
if (ul > UINT_MAX || errno != 0)
goto errout;
entry->count = (__u32)ul;
list_insert_tail(head, entry);
return (0);
errout:
free(entry);
return (EINVAL);
}
/*
* Release all the entries in the list
*/
static void
free_idmap(list_t *head)
{
struct idmap_entry *entry;
while ((entry = list_remove_head(head)) != NULL)
free(entry);
/* list_destroy() to be done by the caller */
}
/*
* Write all bytes in the buffer to fd
*/
static ssize_t
write_buf(int fd, const char *buf, size_t buf_size)
{
ssize_t written, total_written = 0;
size_t remaining = buf_size;
char *position = (char *)buf;
for (;;) {
written = write(fd, position, remaining);
if (written < 0 && errno == EINTR)
continue;
if (written < 0) {
log_errno("write");
return (written);
}
total_written += written;
if (total_written == buf_size)
break;
remaining -= written;
position += written;
}
return (total_written);
}
/*
* Read data from file into buffer
*/
static ssize_t
read_buf(int fd, char *buf, size_t buf_size)
{
int ret;
for (;;) {
ret = read(fd, buf, buf_size);
if (ret < 0 && errno == EINTR)
continue;
break;
}
if (ret < 0)
log_errno("read");
return (ret);
}
/*
* Write idmap of the given type in the buffer to the
* process' uid_map or gid_map proc file.
*
* Return:
* 0 if success
* errno if there's any error
*/
static int
write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type)
{
char path[PATH_MAX];
int fd = -EBADF;
int ret;
(void) snprintf(path, sizeof (path), "/proc/%d/%cid_map",
pid, type == TYPE_UID ? 'u' : 'g');
fd = open(path, O_WRONLY | O_CLOEXEC);
if (fd < 0) {
ret = errno;
log_errno("open(%s)", path);
goto out;
}
ret = write_buf(fd, buf, buf_size);
if (ret < 0)
ret = errno;
else
ret = 0;
out:
if (fd > 0)
close(fd);
return (ret);
}
/*
* Write idmap info in the list to the process
* user namespace, i.e. its /proc/<pid>/uid_map
* and /proc/<pid>/gid_map file.
*
* Return:
* 0 if success
* errno if it fails
*/
static int
write_pid_idmaps(pid_t pid, list_t *head)
{
char *buf_uids, *buf_gids;
char *curr_bufu, *curr_bufg;
/* max 4k to be allowed for each map */
int size_buf_uids = 4096, size_buf_gids = 4096;
struct idmap_entry *entry;
int uid_filled, gid_filled;
int ret;
int has_uids = 0, has_gids = 0;
size_t buf_size;
buf_uids = malloc(size_buf_uids);
if (!buf_uids)
return (ENOMEM);
buf_gids = malloc(size_buf_gids);
if (!buf_gids) {
free(buf_uids);
return (ENOMEM);
}
curr_bufu = buf_uids;
curr_bufg = buf_gids;
for (entry = list_head(head); entry; entry = list_next(head, entry)) {
if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) {
uid_filled = snprintf(curr_bufu, size_buf_uids,
"%u %u %u\n", entry->first, entry->lower_first,
entry->count);
if (uid_filled <= 0 || uid_filled >= size_buf_uids) {
ret = E2BIG;
goto out;
}
curr_bufu += uid_filled;
size_buf_uids -= uid_filled;
has_uids = 1;
}
if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) {
gid_filled = snprintf(curr_bufg, size_buf_gids,
"%u %u %u\n", entry->first, entry->lower_first,
entry->count);
if (gid_filled <= 0 || gid_filled >= size_buf_gids) {
ret = E2BIG;
goto out;
}
curr_bufg += gid_filled;
size_buf_gids -= gid_filled;
has_gids = 1;
}
}
if (has_uids) {
buf_size = curr_bufu - buf_uids;
ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID);
if (ret)
goto out;
}
if (has_gids) {
buf_size = curr_bufg - buf_gids;
ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID);
}
out:
free(buf_uids);
free(buf_gids);
return (ret);
}
/*
* Wait for the child process to exit
* and reap it.
*
* Return:
* process exit code if available
*/
static int
wait_for_pid(pid_t pid)
{
int status;
int ret;
for (;;) {
ret = waitpid(pid, &status, 0);
if (ret < 0) {
if (errno == EINTR)
continue;
return (EXIT_FAILURE);
}
break;
}
if (!WIFEXITED(status))
return (EXIT_FAILURE);
return (WEXITSTATUS(status));
}
/*
* Get the file descriptor of the process user namespace
* given its pid.
*
* Return:
* fd if success
* -1 if it fails
*/
static int
userns_fd_from_pid(pid_t pid)
{
int fd;
char path[PATH_MAX];
(void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0)
log_errno("open(%s)", path);
return (fd);
}
/*
* Get the user namespace file descriptor given a list
* of idmap info.
*
* Return:
* fd if success
* -errno if it fails
*/
static int
userns_fd_from_idmap(list_t *head)
{
pid_t pid;
int ret, fd;
int pipe_fd[2];
char c;
int saved_errno = 0;
/* pipe for bidirectional communication */
ret = pipe(pipe_fd);
if (ret) {
log_errno("pipe");
return (-errno);
}
pid = fork();
if (pid < 0) {
log_errno("fork");
return (-errno);
}
if (pid == 0) {
/* child process */
close(pipe_fd[0]);
ret = unshare(CLONE_NEWUSER);
if (ret == 0) {
/* notify the parent of success */
ret = write_buf(pipe_fd[1], "1", 1);
} else {
saved_errno = errno;
log_errno("unshare");
ret = write_buf(pipe_fd[1], "0", 1);
}
if (ret < 0)
saved_errno = errno;
close(pipe_fd[1]);
exit(saved_errno);
}
/* parent process */
close(pipe_fd[1]);
ret = read_buf(pipe_fd[0], &c, 1);
if (ret == 1 && c == '1') {
ret = write_pid_idmaps(pid, head);
if (!ret) {
fd = userns_fd_from_pid(pid);
if (fd < 0)
fd = -errno;
} else {
fd = -ret;
}
} else {
fd = -EBADF;
}
close(pipe_fd[0]);
(void) wait_for_pid(pid);
return (fd);
}
/*
* Check if the operating system supports idmapped mount on the
* given path or not.
*
* Return:
* true if supported
* false if not supported
*/
static bool
is_idmap_supported(char *path)
{
list_t head;
int ret;
int tree_fd = -EBADF, path_fd = -EBADF;
struct mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = -EBADF,
};
/* strtok_r() won't be happy with a const string */
char *input = strdup("b:0:1000000:1000000");
if (!input) {
errno = ENOMEM;
log_errno("strdup");
return (false);
}
list_create(&head, sizeof (struct idmap_entry),
offsetof(struct idmap_entry, node));
ret = parse_idmap_entry(&head, input);
if (ret) {
errno = ret;
log_errno("parse_idmap_entry(%s)", input);
goto out;
}
ret = userns_fd_from_idmap(&head);
if (ret < 0)
goto out;
attr.userns_fd = ret;
ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC);
if (ret < 0) {
log_errno("openat(%s)", path);
goto out;
}
path_fd = ret;
ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
if (ret < 0) {
log_errno("sys_open_tree");
goto out;
}
tree_fd = ret;
ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr,
sizeof (attr));
if (ret < 0) {
log_errno("sys_mount_setattr");
}
out:
free_idmap(&head);
list_destroy(&head);
if (tree_fd > 0)
close(tree_fd);
if (path_fd > 0)
close(path_fd);
if (attr.userns_fd > 0)
close(attr.userns_fd);
free(input);
return (ret == 0);
}
/*
* Check if the given path is a mount point or not.
*
* Return:
* true if it is
* false otherwise
*/
static bool
is_mountpoint(char *path)
{
char *parent;
struct stat st_me, st_parent;
bool ret;
parent = malloc(strlen(path)+4);
if (!parent) {
errno = ENOMEM;
log_errno("malloc");
return (false);
}
strcat(strcpy(parent, path), "/..");
if (lstat(path, &st_me) != 0 ||
lstat(parent, &st_parent) != 0)
ret = false;
else
if (st_me.st_dev != st_parent.st_dev ||
st_me.st_ino == st_parent.st_ino)
ret = true;
else
ret = false;
free(parent);
return (ret);
}
/*
* Remount the source on the new target folder with the given
* list of idmap info. If target is NULL, the source will be
* unmounted and then remounted if it is a mountpoint, otherwise
* no unmount is done, the source is simply idmap remounted.
*
* Return:
* 0 if success
* -errno otherwise
*/
static int
do_idmap_mount(list_t *idmap, char *source, char *target, int flags)
{
int ret;
int tree_fd = -EBADF, source_fd = -EBADF;
struct mount_attr attr = {
.attr_set = MOUNT_ATTR_IDMAP,
.userns_fd = -EBADF,
};
ret = userns_fd_from_idmap(idmap);
if (ret < 0)
goto out;
attr.userns_fd = ret;
ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC);
if (ret < 0) {
ret = -errno;
log_errno("openat(%s)", source);
goto out;
}
source_fd = ret;
ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags);
if (ret < 0) {
ret = -errno;
log_errno("sys_open_tree");
goto out;
}
tree_fd = ret;
ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr,
sizeof (attr));
if (ret < 0) {
ret = -errno;
log_errno("sys_mount_setattr");
goto out;
}
if (source != NULL && target == NULL && is_mountpoint(source)) {
ret = umount2(source, MNT_DETACH);
if (ret < 0) {
ret = -errno;
log_errno("umount2(%s)", source);
goto out;
}
}
ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ?
source : target, MOVE_MOUNT_F_EMPTY_PATH);
if (ret < 0) {
ret = -errno;
log_errno("sys_move_mount(%s)", target == NULL ?
source : target);
}
out:
if (tree_fd > 0)
close(tree_fd);
if (source_fd > 0)
close(source_fd);
if (attr.userns_fd > 0)
close(attr.userns_fd);
return (ret);
}
static void
print_usage(char *argv[])
{
fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \
" ... [<source>] [<target>]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, " -r Recursively do idmapped mount.\n");
fprintf(stderr, "\n");
fprintf(stderr, " -c Checks if idmapped mount is supported " \
"on the <source> by the operating system or not.\n");
fprintf(stderr, "\n");
fprintf(stderr, " -m <idmap> to specify the idmap info, " \
"in the following format:\n");
fprintf(stderr, " <id_type>:<nsid_first>:<hostid_first>:<count>\n");
fprintf(stderr, "\n");
fprintf(stderr, " <id_type> can be either of 'b', 'u', and 'g'.\n");
fprintf(stderr, "\n");
fprintf(stderr, "The <source> folder will be mounted at <target> " \
"with the provided idmap information.\nIf no <target> is " \
"specified, and <source> is a mount point, " \
"then <source> will be unmounted and then remounted.\n");
}
int
main(int argc, char *argv[])
{
int opt;
list_t idmap_head;
int check_supported = 0;
int ret = EXIT_SUCCESS;
char *source = NULL, *target = NULL;
int flags = 0;
list_create(&idmap_head, sizeof (struct idmap_entry),
offsetof(struct idmap_entry, node));
while ((opt = getopt(argc, argv, "rcm:")) != -1) {
switch (opt) {
case 'r':
flags |= AT_RECURSIVE;
break;
case 'c':
check_supported = 1;
break;
case 'm':
ret = parse_idmap_entry(&idmap_head, optarg);
if (ret) {
errno = ret;
log_errno("parse_idmap_entry(%s)", optarg);
ret = EXIT_FAILURE;
goto out;
}
break;
default:
print_usage(argv);
exit(EXIT_FAILURE);
}
}
if (check_supported == 0 && list_is_empty(&idmap_head)) {
print_usage(argv);
ret = EXIT_FAILURE;
goto out;
}
if (optind >= argc) {
fprintf(stderr, "Expected to have <source>, <target>.\n");
print_usage(argv);
ret = EXIT_FAILURE;
goto out;
}
source = argv[optind];
if (optind < (argc - 1)) {
target = argv[optind + 1];
}
if (check_supported) {
free_idmap(&idmap_head);
list_destroy(&idmap_head);
if (is_idmap_supported(source)) {
printf("idmapped mount is supported on [%s].\n",
source);
return (EXIT_SUCCESS);
} else {
printf("idmapped mount is NOT supported.\n");
return (EXIT_FAILURE);
}
}
ret = do_idmap_mount(&idmap_head, source, target, flags);
if (ret)
ret = EXIT_FAILURE;
out:
free_idmap(&idmap_head);
list_destroy(&idmap_head);
exit(ret);
}