mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-24 18:39:34 +03:00
Add Linux namespace delegation support
This allows ZFS datasets to be delegated to a user/mount namespace Within that namespace, only the delegated datasets are visible Works very similarly to Zones/Jailes on other ZFS OSes As a user: ``` $ unshare -Um $ zfs list no datasets available $ echo $$ 1234 ``` As root: ``` # zfs list NAME ZONED MOUNTPOINT containers off /containers containers/host off /containers/host containers/host/child off /containers/host/child containers/host/child/gchild off /containers/host/child/gchild containers/unpriv on /unpriv containers/unpriv/child on /unpriv/child containers/unpriv/child/gchild on /unpriv/child/gchild # zfs zone /proc/1234/ns/user containers/unpriv ``` Back to the user namespace: ``` $ zfs list NAME USED AVAIL REFER MOUNTPOINT containers 129M 47.8G 24K /containers containers/unpriv 128M 47.8G 24K /unpriv containers/unpriv/child 128M 47.8G 128M /unpriv/child ``` Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com> Co-authored-by: Allan Jude <allan@klarasystems.com> Co-authored-by: Mateusz Piotrowski <mateusz.piotrowski@klarasystems.com> Sponsored-by: Buddy <https://buddy.works> Closes #12263
This commit is contained in:
parent
a1aa8f14c8
commit
4ed5e25074
@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
|
||||
static int zfs_do_unjail(int argc, char **argv);
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
static int zfs_do_zone(int argc, char **argv);
|
||||
static int zfs_do_unzone(int argc, char **argv);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
|
||||
*/
|
||||
@ -184,6 +189,8 @@ typedef enum {
|
||||
HELP_JAIL,
|
||||
HELP_UNJAIL,
|
||||
HELP_WAIT,
|
||||
HELP_ZONE,
|
||||
HELP_UNZONE,
|
||||
} zfs_help_t;
|
||||
|
||||
typedef struct zfs_command {
|
||||
@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
|
||||
{ "jail", zfs_do_jail, HELP_JAIL },
|
||||
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
{ "zone", zfs_do_zone, HELP_ZONE },
|
||||
{ "unzone", zfs_do_unzone, HELP_UNZONE },
|
||||
#endif
|
||||
};
|
||||
|
||||
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
|
||||
@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
|
||||
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
|
||||
case HELP_WAIT:
|
||||
return (gettext("\twait [-t <activity>] <filesystem>\n"));
|
||||
case HELP_ZONE:
|
||||
return (gettext("\tzone <nsfile> <filesystem>\n"));
|
||||
case HELP_UNZONE:
|
||||
return (gettext("\tunzone <nsfile> <filesystem>\n"));
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -8692,6 +8708,50 @@ main(int argc, char **argv)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs zone nsfile filesystem
|
||||
*
|
||||
* Add or delete the given dataset to/from the namespace.
|
||||
*/
|
||||
#ifdef __linux__
|
||||
static int
|
||||
zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
|
||||
{
|
||||
zfs_handle_t *zhp;
|
||||
int ret;
|
||||
|
||||
if (argc < 3) {
|
||||
(void) fprintf(stderr, gettext("missing argument(s)\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
if (argc > 3) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
|
||||
if (zhp == NULL)
|
||||
return (1);
|
||||
|
||||
ret = (zfs_userns(zhp, argv[1], attach) != 0);
|
||||
|
||||
zfs_close(zhp);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_zone(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_zone_impl(argc, argv, B_TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_unzone(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_zone_impl(argc, argv, B_FALSE));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/jail.h>
|
||||
#include <jail.h>
|
||||
|
23
config/kernel-user-ns-inum.m4
Normal file
23
config/kernel-user-ns-inum.m4
Normal file
@ -0,0 +1,23 @@
|
||||
dnl #
|
||||
dnl # 3.18 API change
|
||||
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
|
||||
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
|
||||
#include <linux/user_namespace.h>
|
||||
], [
|
||||
struct user_namespace uns;
|
||||
uns.ns.inum = 0;
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
|
||||
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
|
||||
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
|
||||
[user_namespace->ns.inum exists])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_KTHREAD
|
||||
ZFS_AC_KERNEL_SRC_ZERO_PAGE
|
||||
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
|
||||
|
||||
AC_MSG_CHECKING([for available kernel interfaces])
|
||||
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
|
||||
@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_KTHREAD
|
||||
ZFS_AC_KERNEL_ZERO_PAGE
|
||||
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
|
||||
])
|
||||
|
||||
dnl #
|
||||
|
@ -100,6 +100,7 @@ zfs_errno = enum_with_offset(1024, [
|
||||
'ZFS_ERR_REBUILD_IN_PROGRESS',
|
||||
'ZFS_ERR_BADPROP',
|
||||
'ZFS_ERR_VDEV_NOTSUP',
|
||||
'ZFS_ERR_NOT_USER_NAMESPACE',
|
||||
],
|
||||
{}
|
||||
)
|
||||
|
@ -150,6 +150,7 @@ typedef enum zfs_error {
|
||||
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
|
||||
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
|
||||
EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */
|
||||
EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */
|
||||
EZFS_UNKNOWN
|
||||
} zfs_error_t;
|
||||
|
||||
@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,
|
||||
|
||||
#endif /* __FreeBSD__ */
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
/*
|
||||
* Add or delete the given filesystem to/from the given user namespace.
|
||||
*/
|
||||
_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -25,11 +25,34 @@
|
||||
#define _SPL_ZONE_H
|
||||
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/cred.h>
|
||||
|
||||
#define GLOBAL_ZONEID 0
|
||||
#include <linux/cred.h>
|
||||
#include <linux/user_namespace.h>
|
||||
|
||||
#define zone_dataset_visible(x, y) (1)
|
||||
#define crgetzoneid(x) (GLOBAL_ZONEID)
|
||||
#define INGLOBALZONE(z) (1)
|
||||
/*
|
||||
* Attach the given dataset to the given user namespace.
|
||||
*/
|
||||
extern int zone_dataset_attach(cred_t *, const char *, int);
|
||||
|
||||
/*
|
||||
* Detach the given dataset from the given user namespace.
|
||||
*/
|
||||
extern int zone_dataset_detach(cred_t *, const char *, int);
|
||||
|
||||
/*
|
||||
* Returns true if the named pool/dataset is visible in the current zone.
|
||||
*/
|
||||
extern int zone_dataset_visible(const char *dataset, int *write);
|
||||
|
||||
int spl_zone_init(void);
|
||||
void spl_zone_fini(void);
|
||||
|
||||
extern unsigned int crgetzoneid(const cred_t *);
|
||||
extern unsigned int global_zoneid(void);
|
||||
extern boolean_t inglobalzone(proc_t *);
|
||||
|
||||
#define INGLOBALZONE(x) inglobalzone(x)
|
||||
#define GLOBAL_ZONEID global_zoneid()
|
||||
|
||||
#endif /* SPL_ZONE_H */
|
||||
|
@ -1450,7 +1450,9 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
|
||||
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
|
||||
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
|
||||
ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
|
||||
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
|
||||
ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
|
||||
ZFS_IOC_SET_BOOTENV, /* 0x87 */
|
||||
ZFS_IOC_GET_BOOTENV, /* 0x88 */
|
||||
ZFS_IOC_LAST
|
||||
@ -1531,6 +1533,7 @@ typedef enum {
|
||||
ZFS_ERR_REBUILD_IN_PROGRESS,
|
||||
ZFS_ERR_BADPROP,
|
||||
ZFS_ERR_VDEV_NOTSUP,
|
||||
ZFS_ERR_NOT_USER_NAMESPACE,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
|
@ -44,7 +44,7 @@
|
||||
#include <inttypes.h>
|
||||
#endif /* HAVE_INTTYPES */
|
||||
|
||||
typedef int zoneid_t;
|
||||
typedef uint_t zoneid_t;
|
||||
typedef int projid_t;
|
||||
|
||||
/*
|
||||
|
@ -33,7 +33,17 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GLOBAL_ZONEID 0
|
||||
#ifdef __FreeBSD__
|
||||
#define GLOBAL_ZONEID 0
|
||||
#else
|
||||
/*
|
||||
* Hardcoded in the kernel's root user namespace. A "better" way to get
|
||||
* this would be by using ioctl_ns(2), but this would need to be performed
|
||||
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
|
||||
* supported since Linux 4.9.
|
||||
*/
|
||||
#define GLOBAL_ZONEID 4026531837U
|
||||
#endif
|
||||
|
||||
extern zoneid_t getzoneid(void);
|
||||
|
||||
|
@ -23,10 +23,40 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <zone.h>
|
||||
|
||||
zoneid_t
|
||||
getzoneid(void)
|
||||
{
|
||||
return (GLOBAL_ZONEID);
|
||||
char path[PATH_MAX];
|
||||
char buf[128] = { '\0' };
|
||||
char *cp;
|
||||
|
||||
int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
|
||||
/* This API doesn't have any error checking... */
|
||||
if (c < 0)
|
||||
return (0);
|
||||
|
||||
ssize_t r = readlink(path, buf, sizeof (buf) - 1);
|
||||
if (r < 0)
|
||||
return (0);
|
||||
|
||||
cp = strchr(buf, '[');
|
||||
if (cp == NULL)
|
||||
return (0);
|
||||
cp++;
|
||||
|
||||
unsigned long n = strtoul(cp, NULL, 10);
|
||||
if (n == ULONG_MAX && errno == ERANGE)
|
||||
return (0);
|
||||
zoneid_t z = (zoneid_t)n;
|
||||
|
||||
return (z);
|
||||
}
|
||||
|
@ -1081,7 +1081,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
|
@ -433,6 +433,7 @@
|
||||
<elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -1537,7 +1538,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
@ -4414,6 +4415,12 @@
|
||||
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
|
||||
<return type-id='26a90f95'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
|
||||
<parameter type-id='9200a744' name='zhp'/>
|
||||
<parameter type-id='80f4b756' name='nspath'/>
|
||||
<parameter type-id='95e97e5e' name='attach'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
|
||||
<function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
|
||||
|
@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_VDEV_NOTSUP:
|
||||
return (dgettext(TEXT_DOMAIN, "operation not supported "
|
||||
"on this type of vdev"));
|
||||
case EZFS_NOT_USER_NAMESPACE:
|
||||
return (dgettext(TEXT_DOMAIN, "the provided file "
|
||||
"was not a user namespace file"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
||||
case ZFS_ERR_BADPROP:
|
||||
zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_NOT_USER_NAMESPACE:
|
||||
zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
|
||||
break;
|
||||
default:
|
||||
zfs_error_aux(hdl, "%s", strerror(error));
|
||||
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
||||
|
@ -19,6 +19,9 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <alloca.h>
|
||||
#include <errno.h>
|
||||
@ -207,3 +210,69 @@ zfs_version_kernel(void)
|
||||
ret[read - 1] = '\0';
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add or delete the given filesystem to/from the given user namespace.
|
||||
*/
|
||||
int
|
||||
zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach)
|
||||
{
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char errbuf[1024];
|
||||
unsigned long cmd;
|
||||
int ret;
|
||||
|
||||
if (attach) {
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
|
||||
zhp->zfs_name);
|
||||
} else {
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
|
||||
zhp->zfs_name);
|
||||
}
|
||||
|
||||
switch (zhp->zfs_type) {
|
||||
case ZFS_TYPE_VOLUME:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"volumes can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_SNAPSHOT:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"snapshots can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_BOOKMARK:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"bookmarks can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_VDEV:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"vdevs can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_INVALID:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"invalid zfs_type_t: ZFS_TYPE_INVALID"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_POOL:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pools can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_FILESYSTEM:
|
||||
zfs_fallthrough;
|
||||
}
|
||||
assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
zc.zc_cleanup_fd = open(nspath, O_RDONLY);
|
||||
if (zc.zc_cleanup_fd < 0) {
|
||||
return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf));
|
||||
}
|
||||
|
||||
cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
|
||||
if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
|
||||
zfs_standard_error(hdl, errno, errbuf);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
@ -939,7 +939,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
|
@ -59,9 +59,11 @@ dist_man_MANS = \
|
||||
%D%/man8/zfs-unjail.8 \
|
||||
%D%/man8/zfs-unload-key.8 \
|
||||
%D%/man8/zfs-unmount.8 \
|
||||
%D%/man8/zfs-unzone.8 \
|
||||
%D%/man8/zfs-upgrade.8 \
|
||||
%D%/man8/zfs-userspace.8 \
|
||||
%D%/man8/zfs-wait.8 \
|
||||
%D%/man8/zfs-zone.8 \
|
||||
%D%/man8/zfs_ids_to_path.8 \
|
||||
%D%/man8/zgenhostid.8 \
|
||||
%D%/man8/zinject.8 \
|
||||
|
@ -1885,8 +1885,7 @@ feature and are not relevant on other platforms.
|
||||
The default value is
|
||||
.Sy off .
|
||||
.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the dataset is managed from a non-global zone.
|
||||
Zones are a Solaris feature and are not relevant on other platforms.
|
||||
Controls whether the dataset is managed from a non-global zone or namespace.
|
||||
The default value is
|
||||
.Sy off .
|
||||
.El
|
||||
|
1
man/man8/zfs-unzone.8
Symbolic link
1
man/man8/zfs-unzone.8
Symbolic link
@ -0,0 +1 @@
|
||||
zfs-zone.8
|
116
man/man8/zfs-zone.8
Normal file
116
man/man8/zfs-zone.8
Normal file
@ -0,0 +1,116 @@
|
||||
.\"
|
||||
.\" CDDL HEADER START
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the
|
||||
.\" Common Development and Distribution License (the "License").
|
||||
.\" You may not use this file except in compliance with the License.
|
||||
.\"
|
||||
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
.\" or http://www.opensolaris.org/os/licensing.
|
||||
.\" See the License for the specific language governing permissions
|
||||
.\" and limitations under the License.
|
||||
.\"
|
||||
.\" When distributing Covered Code, include this CDDL HEADER in each
|
||||
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
.\" If applicable, add the following below this CDDL HEADER, with the
|
||||
.\" fields enclosed by brackets "[]" replaced with your own identifying
|
||||
.\" information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
|
||||
.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
|
||||
.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
|
||||
.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
|
||||
.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
|
||||
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
|
||||
.\" Copyright (c) 2014 Integros [integros.com]
|
||||
.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
|
||||
.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
|
||||
.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
|
||||
.\" Copyright 2019 Richard Laager. All rights reserved.
|
||||
.\" Copyright 2018 Nexenta Systems, Inc.
|
||||
.\" Copyright 2019 Joyent, Inc.
|
||||
.\" Copyright 2021 Klara, Inc.
|
||||
.\"
|
||||
.Dd June 3, 2022
|
||||
.Dt ZFS-ZONE 8
|
||||
.Os
|
||||
.
|
||||
.Sh NAME
|
||||
.Nm zfs-zone ,
|
||||
.Nm zfs-unzone
|
||||
.Nd attach and detach ZFS filesystems to user namespaces
|
||||
.Sh SYNOPSIS
|
||||
.Nm zfs Cm zone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Nm zfs Cm unzone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.
|
||||
.Sh DESCRIPTION
|
||||
.Bl -tag -width ""
|
||||
.It Xo
|
||||
.Nm zfs
|
||||
.Cm zone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Xc
|
||||
Attach the specified
|
||||
.Ar filesystem
|
||||
to the user namespace identified by
|
||||
.Ar nsfile .
|
||||
From now on this file system tree can be managed from within a user namespace
|
||||
if the
|
||||
.Sy zoned
|
||||
property has been set.
|
||||
.Pp
|
||||
You cannot attach a zoned dataset's children to another user namespace.
|
||||
You can also not attach the root file system
|
||||
of the user namespace or any dataset
|
||||
which needs to be mounted before the zfs service
|
||||
is run inside the user namespace,
|
||||
as it would be attached unmounted until it is
|
||||
mounted from the service inside the user namespace.
|
||||
.Pp
|
||||
To allow management of the dataset from within a user namespace, the
|
||||
.Sy zoned
|
||||
property has to be set and the user namespaces needs access to the
|
||||
.Pa /dev/zfs
|
||||
device.
|
||||
The
|
||||
.Sy quota
|
||||
property cannot be changed from within a user namespace.
|
||||
.Pp
|
||||
After a dataset is attached to a user namespace and the
|
||||
.Sy zoned
|
||||
property is set,
|
||||
a zoned file system cannot be mounted outside the user namespace,
|
||||
since the user namespace administrator might have set the mount point
|
||||
to an unacceptable value.
|
||||
.It Xo
|
||||
.Nm zfs
|
||||
.Cm unzone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Xc
|
||||
Detach the specified
|
||||
.Ar filesystem
|
||||
from the user namespace identified by
|
||||
.Ar nsfile .
|
||||
.El
|
||||
.Sh EXAMPLES
|
||||
.Ss Example 1 : No Delegating a Dataset to a User Namespace
|
||||
The following example delegates the
|
||||
.Ar tank/users
|
||||
dataset to a user namespace identified by user namespace file
|
||||
.Pa /proc/1234/ns/user .
|
||||
.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zfsprops 7
|
@ -65,7 +65,8 @@ SPL_OBJS := \
|
||||
spl-tsd.o \
|
||||
spl-vmem.o \
|
||||
spl-xdr.o \
|
||||
spl-zlib.o
|
||||
spl-zlib.o \
|
||||
spl-zone.o
|
||||
|
||||
spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
|
||||
|
||||
|
@ -780,8 +780,13 @@ spl_init(void)
|
||||
if ((rc = spl_zlib_init()))
|
||||
goto out7;
|
||||
|
||||
if ((rc = spl_zone_init()))
|
||||
goto out8;
|
||||
|
||||
return (rc);
|
||||
|
||||
out8:
|
||||
spl_zlib_fini();
|
||||
out7:
|
||||
spl_kstat_fini();
|
||||
out6:
|
||||
@ -801,6 +806,7 @@ out1:
|
||||
static void __exit
|
||||
spl_fini(void)
|
||||
{
|
||||
spl_zone_fini();
|
||||
spl_zlib_fini();
|
||||
spl_kstat_fini();
|
||||
spl_proc_fini();
|
||||
|
424
module/os/linux/spl/spl-zone.c
Normal file
424
module/os/linux/spl/spl-zone.c
Normal file
@ -0,0 +1,424 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Klara Systems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/magic.h>
|
||||
#include <sys/zone.h>
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
#include <linux/statfs.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#endif
|
||||
|
||||
static kmutex_t zone_datasets_lock;
|
||||
static struct list_head zone_datasets;
|
||||
|
||||
typedef struct zone_datasets {
|
||||
struct list_head zds_list; /* zone_datasets linkage */
|
||||
struct user_namespace *zds_userns; /* namespace reference */
|
||||
struct list_head zds_datasets; /* datasets for the namespace */
|
||||
} zone_datasets_t;
|
||||
|
||||
typedef struct zone_dataset {
|
||||
struct list_head zd_list; /* zone_dataset linkage */
|
||||
size_t zd_dsnamelen; /* length of name */
|
||||
char zd_dsname[0]; /* name of the member dataset */
|
||||
} zone_dataset_t;
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
/*
|
||||
* Returns:
|
||||
* - 0 on success
|
||||
* - EBADF if it cannot open the provided file descriptor
|
||||
* - ENOTTY if the file itself is a not a user namespace file. We want to
|
||||
* intercept this error in the ZFS layer. We cannot just return one of the
|
||||
* ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
|
||||
* and the SPL layers.
|
||||
*/
|
||||
static int
|
||||
user_ns_get(int fd, struct user_namespace **userns)
|
||||
{
|
||||
struct kstatfs st;
|
||||
struct file *nsfile;
|
||||
struct ns_common *ns;
|
||||
int error;
|
||||
|
||||
if ((nsfile = fget(fd)) == NULL)
|
||||
return (EBADF);
|
||||
if (vfs_statfs(&nsfile->f_path, &st) != 0) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
if (st.f_type != NSFS_MAGIC) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
ns = get_proc_ns(file_inode(nsfile));
|
||||
if (ns->ops->type != CLONE_NEWUSER) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
*userns = container_of(ns, struct user_namespace, ns);
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
fput(nsfile);
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
|
||||
static unsigned int
|
||||
user_ns_zoneid(struct user_namespace *user_ns)
|
||||
{
|
||||
unsigned int r;
|
||||
|
||||
#if defined(HAVE_USER_NS_COMMON_INUM)
|
||||
r = user_ns->ns.inum;
|
||||
#else
|
||||
r = user_ns->proc_inum;
|
||||
#endif
|
||||
|
||||
return (r);
|
||||
}
|
||||
|
||||
static struct zone_datasets *
|
||||
zone_datasets_lookup(unsigned int nsinum)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
|
||||
list_for_each_entry(zds, &zone_datasets, zds_list) {
|
||||
if (user_ns_zoneid(zds->zds_userns) == nsinum)
|
||||
return (zds);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
static struct zone_dataset *
|
||||
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
|
||||
{
|
||||
zone_dataset_t *zd;
|
||||
|
||||
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
||||
if (zd->zd_dsnamelen != dsnamelen)
|
||||
continue;
|
||||
if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
|
||||
return (zd);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
zone_dataset_cred_check(cred_t *cred)
|
||||
{
|
||||
|
||||
if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
|
||||
return (EPERM);
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
|
||||
static int
|
||||
zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
|
||||
{
|
||||
|
||||
if (dataset[0] == '\0' || dataset[0] == '/')
|
||||
return (ENOENT);
|
||||
|
||||
*dsnamelen = strlen(dataset);
|
||||
/* Ignore trailing slash, if supplied. */
|
||||
if (dataset[*dsnamelen - 1] == '/')
|
||||
(*dsnamelen)--;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
struct user_namespace *userns;
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
int error;
|
||||
size_t dsnamelen;
|
||||
|
||||
if ((error = zone_dataset_cred_check(cred)) != 0)
|
||||
return (error);
|
||||
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
||||
return (error);
|
||||
if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
|
||||
return (error);
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
||||
if (zds == NULL) {
|
||||
zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
|
||||
INIT_LIST_HEAD(&zds->zds_list);
|
||||
INIT_LIST_HEAD(&zds->zds_datasets);
|
||||
zds->zds_userns = userns;
|
||||
/*
|
||||
* Lock the namespace by incresing its refcount to prevent
|
||||
* the namespace ID from being reused.
|
||||
*/
|
||||
get_user_ns(userns);
|
||||
list_add_tail(&zds->zds_list, &zone_datasets);
|
||||
} else {
|
||||
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
||||
if (zd != NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (EEXIST);
|
||||
}
|
||||
}
|
||||
|
||||
zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
|
||||
zd->zd_dsnamelen = dsnamelen;
|
||||
strncpy(zd->zd_dsname, dataset, dsnamelen);
|
||||
zd->zd_dsname[dsnamelen] = '\0';
|
||||
INIT_LIST_HEAD(&zd->zd_list);
|
||||
list_add_tail(&zd->zd_list, &zds->zds_datasets);
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
#else
|
||||
return (ENXIO);
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_attach);
|
||||
|
||||
int
|
||||
zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
struct user_namespace *userns;
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
int error;
|
||||
size_t dsnamelen;
|
||||
|
||||
if ((error = zone_dataset_cred_check(cred)) != 0)
|
||||
return (error);
|
||||
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
||||
return (error);
|
||||
if ((error = user_ns_get(cleanup_fd, &userns)) != 0)
|
||||
return (error);
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
||||
if (zds != NULL)
|
||||
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
||||
if (zds == NULL || zd == NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
list_del(&zd->zd_list);
|
||||
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
||||
|
||||
/* Prune the namespace entry if it has no more delegations. */
|
||||
if (list_empty(&zds->zds_datasets)) {
|
||||
/*
|
||||
* Decrease the refcount now that the namespace is no longer
|
||||
* used. It is no longer necessary to prevent the namespace ID
|
||||
* from being reused.
|
||||
*/
|
||||
put_user_ns(userns);
|
||||
list_del(&zds->zds_list);
|
||||
kmem_free(zds, sizeof (*zds));
|
||||
}
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
#else
|
||||
return (ENXIO);
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_detach);
|
||||
|
||||
/*
|
||||
* A dataset is visible if:
|
||||
* - It is a parent of a namespace entry.
|
||||
* - It is one of the namespace entries.
|
||||
* - It is a child of a namespace entry.
|
||||
*
|
||||
* A dataset is writable if:
|
||||
* - It is one of the namespace entries.
|
||||
* - It is a child of a namespace entry.
|
||||
*
|
||||
* The parent datasets of namespace entries are visible and
|
||||
* read-only to provide a path back to the root of the pool.
|
||||
*/
|
||||
int
|
||||
zone_dataset_visible(const char *dataset, int *write)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
size_t dsnamelen, zd_len;
|
||||
int visible;
|
||||
|
||||
/* Default to read-only, in case visible is returned. */
|
||||
if (write != NULL)
|
||||
*write = 0;
|
||||
if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
|
||||
return (0);
|
||||
if (INGLOBALZONE(curproc)) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
return (1);
|
||||
}
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
|
||||
if (zds == NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
visible = 0;
|
||||
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
||||
zd_len = strlen(zd->zd_dsname);
|
||||
if (zd_len > dsnamelen) {
|
||||
/*
|
||||
* The name of the namespace entry is longer than that
|
||||
* of the dataset, so it could be that the dataset is a
|
||||
* parent of the namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset,
|
||||
dsnamelen) == 0 &&
|
||||
zd->zd_dsname[dsnamelen] == '/';
|
||||
if (visible)
|
||||
break;
|
||||
} else if (zd_len == dsnamelen) {
|
||||
/*
|
||||
* The name of the namespace entry is as long as that
|
||||
* of the dataset, so perhaps the dataset itself is the
|
||||
* namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
|
||||
if (visible) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* The name of the namespace entry is shorter than that
|
||||
* of the dataset, so perhaps the dataset is a child of
|
||||
* the namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset,
|
||||
zd_len) == 0 && dataset[zd_len] == '/';
|
||||
if (visible) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (visible);
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_visible);
|
||||
|
||||
unsigned int
|
||||
global_zoneid(void)
|
||||
{
|
||||
unsigned int z = 0;
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
z = user_ns_zoneid(&init_user_ns);
|
||||
#endif
|
||||
|
||||
return (z);
|
||||
}
|
||||
EXPORT_SYMBOL(global_zoneid);
|
||||
|
||||
unsigned int
|
||||
crgetzoneid(const cred_t *cr)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
r = user_ns_zoneid(cr->user_ns);
|
||||
#endif
|
||||
|
||||
return (r);
|
||||
}
|
||||
EXPORT_SYMBOL(crgetzoneid);
|
||||
|
||||
boolean_t
|
||||
inglobalzone(proc_t *proc)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS)
|
||||
return (proc->cred->user_ns == &init_user_ns);
|
||||
#else
|
||||
return (B_TRUE);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(inglobalzone);
|
||||
|
||||
int
|
||||
spl_zone_init(void)
|
||||
{
|
||||
mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
INIT_LIST_HEAD(&zone_datasets);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_zone_fini(void)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
|
||||
/*
|
||||
* It would be better to assert an empty zone_datasets, but since
|
||||
* there's no automatic mechanism for cleaning them up if the user
|
||||
* namespace is destroyed, just do it here, since spl is about to go
|
||||
* out of context.
|
||||
*/
|
||||
while (!list_empty(&zone_datasets)) {
|
||||
zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
|
||||
while (!list_empty(&zds->zds_datasets)) {
|
||||
zd = list_entry(zds->zds_datasets.next,
|
||||
zone_dataset_t, zd_list);
|
||||
list_del(&zd->zd_list);
|
||||
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
||||
put_user_ns(zds->zds_userns);
|
||||
}
|
||||
list_del(&zds->zds_list);
|
||||
kmem_free(zds, sizeof (*zds));
|
||||
}
|
||||
mutex_destroy(&zone_datasets_lock);
|
||||
}
|
@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
|
||||
static int
|
||||
priv_policy(const cred_t *cr, int capability, int err)
|
||||
{
|
||||
return (priv_policy_ns(cr, capability, err, NULL));
|
||||
return (priv_policy_ns(cr, capability, err, cr->user_ns));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -37,6 +37,7 @@
|
||||
* Copyright 2017 RackTop Systems.
|
||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2021 Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@ -150,6 +151,48 @@ out:
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_attach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
|
||||
* back from the SPL layer, which does not know about ZFS_ERR_* errors.
|
||||
* See the comment at the user_ns_get() function in spl-zone.c for
|
||||
* details.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_detach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* See the comment in zfs_ioc_userns_attach() for details on what is
|
||||
* going on here.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_max_nvlist_src_size_os(void)
|
||||
{
|
||||
@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
|
||||
void
|
||||
zfs_ioctl_init_os(void)
|
||||
{
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
|
||||
zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
|
||||
zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
|
||||
int error = 0;
|
||||
zfsvfs_t *zfsvfs = NULL;
|
||||
vfs_t *vfs = NULL;
|
||||
int canwrite;
|
||||
int dataset_visible_zone;
|
||||
|
||||
ASSERT(zm);
|
||||
ASSERT(osname);
|
||||
|
||||
dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
|
||||
|
||||
/*
|
||||
* Refuse to mount a filesystem if we are in a namespace and the
|
||||
* dataset is not visible or writable in that namespace.
|
||||
*/
|
||||
if (!INGLOBALZONE(curproc) &&
|
||||
(!dataset_visible_zone || !canwrite)) {
|
||||
return (SET_ERROR(EPERM));
|
||||
}
|
||||
|
||||
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* If a non-writable filesystem is being mounted without the
|
||||
* read-only flag, pretend it was set, as done for snapshots.
|
||||
*/
|
||||
if (!canwrite)
|
||||
vfs->vfs_readonly = true;
|
||||
|
||||
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
|
||||
if (error) {
|
||||
zfsvfs_vfs_free(vfs);
|
||||
|
@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
|
||||
struct file_system_type zpl_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = ZFS_DRIVER,
|
||||
.fs_flags = FS_USERNS_MOUNT,
|
||||
.mount = zpl_mount,
|
||||
.kill_sb = zpl_kill_sb,
|
||||
};
|
||||
|
@ -177,7 +177,8 @@ tests = ['upgrade_projectquota_001_pos']
|
||||
tags = ['functional', 'upgrade']
|
||||
|
||||
[tests/functional/user_namespace:Linux]
|
||||
tests = ['user_namespace_001']
|
||||
tests = ['user_namespace_001', 'user_namespace_002', 'user_namespace_003',
|
||||
'user_namespace_004']
|
||||
tags = ['functional', 'user_namespace']
|
||||
|
||||
[tests/functional/userquota:Linux]
|
||||
|
@ -146,11 +146,13 @@ export SYSTEM_FILES_LINUX='attr
|
||||
mkswap
|
||||
modprobe
|
||||
mpstat
|
||||
nsenter
|
||||
parted
|
||||
perf
|
||||
setfattr
|
||||
sha256sum
|
||||
udevadm
|
||||
unshare
|
||||
useradd
|
||||
userdel
|
||||
usermod
|
||||
|
@ -1895,6 +1895,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/user_namespace/cleanup.ksh \
|
||||
functional/user_namespace/setup.ksh \
|
||||
functional/user_namespace/user_namespace_001.ksh \
|
||||
functional/user_namespace/user_namespace_002.ksh \
|
||||
functional/user_namespace/user_namespace_003.ksh \
|
||||
functional/user_namespace/user_namespace_004.ksh \
|
||||
functional/userquota/cleanup.ksh \
|
||||
functional/userquota/groupspace_001_pos.ksh \
|
||||
functional/userquota/groupspace_002_pos.ksh \
|
||||
|
@ -47,6 +47,11 @@ function cleanup
|
||||
done
|
||||
}
|
||||
|
||||
unshare -Urm echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Check root in user namespaces"
|
||||
|
115
tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
Executable file
115
tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
Executable file
@ -0,0 +1,115 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Regression test for delegation of datasets to user namespaces.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Delegate a dataset to a user namespace.
|
||||
# 2. Check that 'zfs list' is only able to see inside the delegation.
|
||||
# 3. Check that 'zfs create' is able to create only inside the delegation.
|
||||
# 4. Check that the filesystems can be mounted inside the delegation,
|
||||
# and that file permissions are appropriate.
|
||||
# 5. Check that 'zfs destroy' is able to destroy only inside the delegation.
|
||||
# 6. Check that 'zfs unzone' has a desirable effect.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
user_ns_cleanup() {
|
||||
if [ -n "$proc_ns_added" ]; then
|
||||
log_must zfs unzone $proc_ns_added $TESTPOOL/userns
|
||||
fi
|
||||
if [ -n "$unshared_pid" ]; then
|
||||
kill -9 $unshared_pid
|
||||
# Give it a sec to make the global cleanup more reliable.
|
||||
sleep 1
|
||||
fi
|
||||
log_must zfs destroy -r $TESTPOOL/userns
|
||||
}
|
||||
|
||||
log_onexit user_ns_cleanup
|
||||
|
||||
log_assert "Check zfs/zpool command delegation in user namespaces"
|
||||
|
||||
# Create the baseline datasets.
|
||||
log_must zfs create -o zoned=on $TESTPOOL/userns
|
||||
log_must zfs create -o zoned=on $TESTPOOL/userns/testds
|
||||
# Partial match should be denied; hence we also set this to be 'zoned'.
|
||||
log_must zfs create -o zoned=on $TESTPOOL/user
|
||||
|
||||
# 1. Create a user namespace with a cloned mount namespace, then delegate.
|
||||
unshare -Urm echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
unshare -Urm /usr/bin/sleep 1h &
|
||||
unshared_pid=$!
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
proc_ns=/proc/$unshared_pid/ns/user
|
||||
sleep 2 # Wait for unshare to acquire user namespace
|
||||
log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
|
||||
|
||||
NSENTER="nsenter -t $unshared_pid --all"
|
||||
|
||||
$NSENTER echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to enter user namespace"
|
||||
fi
|
||||
|
||||
# 1b. Pre-test by checking that 'zone' does something new.
|
||||
list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
|
||||
log_must test -z "$list"
|
||||
log_must zfs zone $proc_ns $TESTPOOL/userns
|
||||
proc_ns_added="$ns"
|
||||
|
||||
# 2. 'zfs list'
|
||||
list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
|
||||
log_must test "$list" = "$TESTPOOL $TESTPOOL/userns $TESTPOOL/userns/testds "
|
||||
|
||||
# 3. 'zfs create'
|
||||
log_must $NSENTER zfs create $TESTPOOL/userns/created
|
||||
log_mustnot $NSENTER zfs create $TESTPOOL/user/created
|
||||
|
||||
# 4. Check file permissions (create mounts the filesystem). The 'permissions'
|
||||
# check is simply, does it get mapped to user namespace's root/root?
|
||||
log_must $NSENTER df -h /$TESTPOOL/userns/created
|
||||
log_must $NSENTER mkfile 8192 /$TESTPOOL/userns/created/testfile
|
||||
uidgid=$($NSENTER stat -c '%u %g' /$TESTPOOL/userns/created/testfile)
|
||||
log_must test "${uidgid}" = "0 0"
|
||||
|
||||
# 5. 'zfs destroy'
|
||||
log_must $NSENTER zfs destroy $TESTPOOL/userns/created
|
||||
log_mustnot $NSENTER zfs destroy $TESTPOOL/user
|
||||
|
||||
# 6. 'zfs unzone' should have an effect
|
||||
log_must zfs unzone $proc_ns $TESTPOOL/userns
|
||||
proc_ns_added=""
|
||||
list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
|
||||
log_must test -z "$list"
|
||||
|
||||
log_pass "Check zfs/zpool command delegation in user namespaces"
|
97
tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
Executable file
97
tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
Executable file
@ -0,0 +1,97 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Regression test for delegation of datasets to user namespaces.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Delegate two datasets with distinctive names to a user namespace.
|
||||
# 2. Check that 'zfs list' is not able to see datasets outside of the
|
||||
# delegation, which have a prefix matching one of the delegated sets.
|
||||
# Also, check that all the delegated sets are visible.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
user_ns_cleanup() {
|
||||
if [ -n "$proc_ns_added" ]; then
|
||||
log_must zfs unzone $proc_ns_added $TESTPOOL/userns
|
||||
log_must zfs unzone $proc_ns_added $TESTPOOL/otheruserns
|
||||
fi
|
||||
if [ -n "$unshared_pid" ]; then
|
||||
kill -9 $unshared_pid
|
||||
# Give it a sec to make the global cleanup more reliable.
|
||||
sleep 1
|
||||
fi
|
||||
log_must zfs destroy -r $TESTPOOL/userns
|
||||
log_must zfs destroy -r $TESTPOOL/usernsisitnot
|
||||
log_must zfs destroy -r $TESTPOOL/otheruserns
|
||||
}
|
||||
|
||||
log_onexit user_ns_cleanup
|
||||
|
||||
log_assert "Check zfs list command handling of dataset visibility in user namespaces"
|
||||
|
||||
# Create the baseline dataset.
|
||||
log_must zfs create -o zoned=on $TESTPOOL/userns
|
||||
# Datasets with a prefix matching the delegated dataset should not be
|
||||
# automatically considered visible.
|
||||
log_must zfs create -o zoned=on $TESTPOOL/usernsisitnot
|
||||
# All delegated datasets should be visible.
|
||||
log_must zfs create -o zoned=on $TESTPOOL/otheruserns
|
||||
|
||||
# 1. Create a user namespace with a cloned mount namespace, then delegate.
|
||||
unshare -Urm echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
unshare -Urm /usr/bin/sleep 1h &
|
||||
unshared_pid=$!
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
proc_ns=/proc/$unshared_pid/ns/user
|
||||
sleep 2 # Wait for unshare to acquire user namespace
|
||||
log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}"
|
||||
|
||||
NSENTER="nsenter -t $unshared_pid --all"
|
||||
|
||||
$NSENTER echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to enter user namespace"
|
||||
fi
|
||||
|
||||
# 1b. Pre-test by checking that 'zone' does something new.
|
||||
list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
|
||||
log_must test -z "$list"
|
||||
log_must zfs zone $proc_ns $TESTPOOL/userns
|
||||
log_must zfs zone $proc_ns $TESTPOOL/otheruserns
|
||||
proc_ns_added="$ns"
|
||||
|
||||
# 2. 'zfs list'
|
||||
list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
|
||||
log_must test "$list" = "$TESTPOOL $TESTPOOL/otheruserns $TESTPOOL/userns "
|
||||
|
||||
log_pass "Check zfs list command handling of dataset visibility in user namespaces"
|
67
tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
Executable file
67
tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Regression test for safeguards around the delegation of datasets to
|
||||
# user namespaces.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Check that 'zfs zone' correctly handles the case of the first
|
||||
# argument being a non-namespace file.
|
||||
# 2. Check that 'zfs zone' correctly handles the case of the first
|
||||
# argument being a non-namespace and non-existent file.
|
||||
#
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
user_ns_cleanup() {
|
||||
if [ -n "$temp_file" ]; then
|
||||
log_must rm -f "$temp_file"
|
||||
fi
|
||||
|
||||
log_must zfs destroy -r "$TESTPOOL/userns"
|
||||
}
|
||||
|
||||
log_onexit user_ns_cleanup
|
||||
|
||||
log_assert "Check zfs zone command handling of non-namespace files"
|
||||
|
||||
# Pass if user namespaces are not supported.
|
||||
unshare -Urm echo test
|
||||
if [ "$?" -ne "0" ]; then
|
||||
log_unsupported "Failed to create user namespace"
|
||||
fi
|
||||
|
||||
# Create the baseline datasets.
|
||||
log_must zfs create -o zoned=on "$TESTPOOL/userns"
|
||||
|
||||
# 1. Try to pass a non-namespace file to zfs zone.
|
||||
temp_file="$(TMPDIR=$TEST_BASE_DIR mktemp)"
|
||||
log_mustnot zfs zone "$temp_file" "$TESTPOOL/userns"
|
||||
|
||||
# 2. Try to pass a non-namespace and non-existent file to zfs zone.
|
||||
log_mustnot zfs zone "$TEMP_BASE_DIR/nonexistent" "$TESTPOOL/userns"
|
||||
|
||||
log_pass "Check zfs zone command handling of non-namespace files"
|
Loading…
Reference in New Issue
Block a user