mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Add Linux namespace delegation support
This allows ZFS datasets to be delegated to a user/mount namespace Within that namespace, only the delegated datasets are visible Works very similarly to Zones/Jailes on other ZFS OSes As a user: ``` $ unshare -Um $ zfs list no datasets available $ echo $$ 1234 ``` As root: ``` # zfs list NAME ZONED MOUNTPOINT containers off /containers containers/host off /containers/host containers/host/child off /containers/host/child containers/host/child/gchild off /containers/host/child/gchild containers/unpriv on /unpriv containers/unpriv/child on /unpriv/child containers/unpriv/child/gchild on /unpriv/child/gchild # zfs zone /proc/1234/ns/user containers/unpriv ``` Back to the user namespace: ``` $ zfs list NAME USED AVAIL REFER MOUNTPOINT containers 129M 47.8G 24K /containers containers/unpriv 128M 47.8G 24K /unpriv containers/unpriv/child 128M 47.8G 128M /unpriv/child ``` Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com> Co-authored-by: Allan Jude <allan@klarasystems.com> Co-authored-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com> Sponsored-by: Buddy <https://buddy.works> Closes #12263
This commit is contained in:
committed by
Brian Behlendorf
parent
a1aa8f14c8
commit
4ed5e25074
@@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
|
||||
static int
|
||||
priv_policy(const cred_t *cr, int capability, int err)
|
||||
{
|
||||
return (priv_policy_ns(cr, capability, err, NULL));
|
||||
return (priv_policy_ns(cr, capability, err, cr->user_ns));
|
||||
}
|
||||
|
||||
static int
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
* Copyright 2017 RackTop Systems.
|
||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2021 Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@@ -150,6 +151,48 @@ out:
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_attach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
|
||||
* back from the SPL layer, which does not know about ZFS_ERR_* errors.
|
||||
* See the comment at the user_ns_get() function in spl-zone.c for
|
||||
* details.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_detach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* See the comment in zfs_ioc_userns_attach() for details on what is
|
||||
* going on here.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_max_nvlist_src_size_os(void)
|
||||
{
|
||||
@@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
|
||||
void
|
||||
zfs_ioctl_init_os(void)
|
||||
{
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
|
||||
zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
|
||||
zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
||||
@@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
|
||||
int error = 0;
|
||||
zfsvfs_t *zfsvfs = NULL;
|
||||
vfs_t *vfs = NULL;
|
||||
int canwrite;
|
||||
int dataset_visible_zone;
|
||||
|
||||
ASSERT(zm);
|
||||
ASSERT(osname);
|
||||
|
||||
dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
|
||||
|
||||
/*
|
||||
* Refuse to mount a filesystem if we are in a namespace and the
|
||||
* dataset is not visible or writable in that namespace.
|
||||
*/
|
||||
if (!INGLOBALZONE(curproc) &&
|
||||
(!dataset_visible_zone || !canwrite)) {
|
||||
return (SET_ERROR(EPERM));
|
||||
}
|
||||
|
||||
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* If a non-writable filesystem is being mounted without the
|
||||
* read-only flag, pretend it was set, as done for snapshots.
|
||||
*/
|
||||
if (!canwrite)
|
||||
vfs->vfs_readonly = true;
|
||||
|
||||
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
|
||||
if (error) {
|
||||
zfsvfs_vfs_free(vfs);
|
||||
|
||||
@@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
|
||||
struct file_system_type zpl_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = ZFS_DRIVER,
|
||||
.fs_flags = FS_USERNS_MOUNT,
|
||||
.mount = zpl_mount,
|
||||
.kill_sb = zpl_kill_sb,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user