Fix inconsistent mount options for ZFS root

While mounting ZFS root during boot on Linux distributions from initrd,
mount from busybox is effectively used which executes mount system call
directly. This skips the ZFS helper mount.zfs, which checks and enables
the mount options as specified in dataset properties. As a result,
datasets mounted during boot from initrd do not have correct mount
options as specified in ZFS dataset properties.

There has been an attempt to use mount.zfs in zfs initrd script,
responsible for mounting the ZFS root filesystem (PR#13305). This was
later reverted (PR#14908) after discovering that using mount.zfs breaks
mounting of snapshots on root (/) and other child datasets of root have
the same issue (Issue#9461).

This happens because switching from busybox mount to mount.zfs correctly
parses the mount options but also adds 'mntpoint=/root' to the mount
options, which is then prepended to the snapshot mountpoint in
'.zfs/snapshot'. '/root' is the directory on Debian with initramfs-tools
where root filesystem is mounted before pivot_root. When Linux runtime
is reached, trying to access the snapshots on root results in
automounting the snapshot on '/root/.zfs/*', which fails.

This commit attempts to fix the automounting of snapshots on root, while
using mount.zfs in initrd script. Since the mountpoint of dataset is
stored in vfs_mntpoint field, we can check if current mountpoint of
dataset and vfs_mntpoint are same or not. If they are not same, reset
the vfs_mntpoint field with current mountpoint. This fixes the
mountpoints of root dataset and children in respective vfs_mntpoint
fields when we try to access the snapshots of root dataset or its
children. With correct mountpoint for root dataset and children stored
in vfs_mntpoint, all snapshots of root dataset are mounted correctly
and become accessible.

This fix will come into play only if current process, that is trying to
access the snapshots is not in chroot context. The Linux kernel API
that is used to convert struct path into char format (d_path), returns
the complete path for given struct path. It works in chroot environment
as well and returns the correct path from original filesystem root.

However d_path fails to return the complete path if any directory from
original root filesystem is mounted using --bind flag or --rbind flag
in chroot environment. In this case, if we try to access the snapshot
from outside the chroot environment, d_path returns the path correctly,
i.e. it returns the correct path to the directory that is mounted with
--bind flag. However inside the chroot environment, it only returns the
path inside chroot.

For now, there is not a better way in my understanding that gives the
complete path in char format and handles the case where directories from
root filesystem are mounted with --bind or --rbind on another path which
user will later chroot into. So this fix gets enabled if current
process trying to access the snapshot is not in chroot context.

With the snapshots issue fixed for root filesystem, using mount.zfs in
ZFS initrd script, mounts the datasets with correct mount options.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
Closes #16646
This commit is contained in:
Umer Saleem 2024-10-17 18:09:39 +05:00 committed by GitHub
parent 38a04f0a7c
commit 27e8f56102
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 111 additions and 10 deletions

View File

@ -344,7 +344,7 @@ mount_fs()
# Need the _original_ datasets mountpoint!
mountpoint=$(get_fs_value "$fs" mountpoint)
ZFS_CMD="mount -o zfsutil -t zfs"
ZFS_CMD="mount.zfs -o zfsutil"
if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
# Can't use the mountpoint property. Might be one of our
# clones. Check the 'org.zol:mountpoint' property set in
@ -359,9 +359,8 @@ mount_fs()
# isn't the root fs.
return 0
fi
# Don't use mount.zfs -o zfsutils for legacy mountpoint
if [ "$mountpoint" = "legacy" ]; then
ZFS_CMD="mount -t zfs"
ZFS_CMD="mount.zfs"
fi
# Last hail-mary: Hope 'rootmnt' is set!
mountpoint=""

View File

@ -69,6 +69,7 @@ typedef struct vfs {
boolean_t vfs_do_relatime;
boolean_t vfs_nbmand;
boolean_t vfs_do_nbmand;
kmutex_t vfs_mntpt_lock;
} vfs_t;
typedef struct zfs_mnt {

View File

@ -767,9 +767,6 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
uint64_t id, pos = 0;
int error = 0;
if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
return (SET_ERROR(ENOENT));
cookie = spl_fstrans_mark();
snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@ -786,8 +783,14 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
break;
}
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint, snapname);
} else
error = SET_ERROR(ENOENT);
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
out:
kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
spl_fstrans_unmark(cookie);
@ -1049,6 +1052,66 @@ exportfs_flush(void)
(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
}
/*
* Returns the path in char format for given struct path. Uses
* d_path exported by kernel to convert struct path to char
* format. Returns the correct path for mountpoints and chroot
* environments.
*
* If chroot environment has directories that are mounted with
* --bind or --rbind flag, d_path returns the complete path inside
* chroot environment but does not return the absolute path, i.e.
* the path to chroot environment is missing.
*/
static int
get_root_path(struct path *path, char *buff, int len)
{
char *path_buffer, *path_ptr;
int error = 0;
path_get(path);
path_buffer = kmem_zalloc(len, KM_SLEEP);
path_ptr = d_path(path, path_buffer, len);
if (IS_ERR(path_ptr))
error = SET_ERROR(-PTR_ERR(path_ptr));
else
strcpy(buff, path_ptr);
kmem_free(path_buffer, len);
path_put(path);
return (error);
}
/*
* Returns if the current process root is chrooted or not. Linux
* kernel exposes the task_struct for current process and init.
* Since init process root points to actual root filesystem when
* Linux runtime is reached, we can compare the current process
* root with init process root to determine if root of the current
* process is different from init, which can reliably determine if
* current process is in chroot context or not.
*/
static int
is_current_chrooted(void)
{
struct task_struct *curr = current, *global = &init_task;
struct path cr_root, gl_root;
task_lock(curr);
get_fs_root(curr->fs, &cr_root);
task_unlock(curr);
task_lock(global);
get_fs_root(global->fs, &gl_root);
task_unlock(global);
int chrooted = !path_equal(&cr_root, &gl_root);
path_put(&gl_root);
path_put(&cr_root);
return (chrooted);
}
/*
* Attempt to unmount a snapshot by making a call to user space.
* There is no assurance that this can or will succeed, is just a
@ -1123,14 +1186,50 @@ zfsctl_snapshot_mount(struct path *path, int flags)
if (error)
goto error;
if (is_current_chrooted() == 0) {
/*
* Current process is not in chroot context
*/
char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
struct path mnt_path;
mnt_path.mnt = path->mnt;
mnt_path.dentry = path->mnt->mnt_root;
/*
* Get path to current mountpoint
*/
error = get_root_path(&mnt_path, m, MAXPATHLEN);
if (error != 0) {
kmem_free(m, MAXPATHLEN);
goto error;
}
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
/*
* If current mnountpoint and vfs_mntpoint are not same,
* store current mountpoint in vfs_mntpoint.
*/
if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
}
} else
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
kmem_free(m, MAXPATHLEN);
}
/*
* Construct a mount point path from sb of the ctldir inode and dirent
* name, instead of from d_path(), so that chroot'd process doesn't fail
* on mount.zfs(8).
*/
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
dname(dentry));
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(options, 7, "%s",
zfs_snapshot_no_setuid ? "nosuid" : "suid");

View File

@ -115,7 +115,7 @@ zfsvfs_vfs_free(vfs_t *vfsp)
if (vfsp != NULL) {
if (vfsp->vfs_mntpoint != NULL)
kmem_strfree(vfsp->vfs_mntpoint);
mutex_destroy(&vfsp->vfs_mntpt_lock);
kmem_free(vfsp, sizeof (vfs_t));
}
}
@ -197,10 +197,11 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
vfsp->vfs_do_nbmand = B_TRUE;
break;
case TOKEN_MNTPOINT:
if (vfsp->vfs_mntpoint != NULL)
kmem_strfree(vfsp->vfs_mntpoint);
vfsp->vfs_mntpoint = match_strdup(&args[0]);
if (vfsp->vfs_mntpoint == NULL)
return (SET_ERROR(ENOMEM));
break;
default:
break;
@ -219,6 +220,7 @@ zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
int error;
tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
if (mntopts != NULL) {
substring_t args[MAX_OPT_ARGS];