2021-02-21 19:19:43 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021 Klara Systems, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/sysmacros.h>
|
|
|
|
#include <sys/kmem.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/magic.h>
|
|
|
|
#include <sys/zone.h>
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_NS)
|
|
|
|
#include <linux/statfs.h>
|
|
|
|
#include <linux/proc_ns.h>
|
|
|
|
#endif
|
|
|
|
|
2022-10-18 22:29:44 +03:00
|
|
|
#include <sys/mutex.h>
|
|
|
|
|
2021-02-21 19:19:43 +03:00
|
|
|
static kmutex_t zone_datasets_lock;
|
|
|
|
static struct list_head zone_datasets;
|
|
|
|
|
|
|
|
typedef struct zone_datasets {
|
|
|
|
struct list_head zds_list; /* zone_datasets linkage */
|
|
|
|
struct user_namespace *zds_userns; /* namespace reference */
|
|
|
|
struct list_head zds_datasets; /* datasets for the namespace */
|
|
|
|
} zone_datasets_t;
|
|
|
|
|
|
|
|
typedef struct zone_dataset {
|
|
|
|
struct list_head zd_list; /* zone_dataset linkage */
|
|
|
|
size_t zd_dsnamelen; /* length of name */
|
2023-01-10 23:14:59 +03:00
|
|
|
char zd_dsname[]; /* name of the member dataset */
|
2021-02-21 19:19:43 +03:00
|
|
|
} zone_dataset_t;
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
|
|
|
/*
|
|
|
|
* Returns:
|
|
|
|
* - 0 on success
|
|
|
|
* - EBADF if it cannot open the provided file descriptor
|
|
|
|
* - ENOTTY if the file itself is a not a user namespace file. We want to
|
|
|
|
* intercept this error in the ZFS layer. We cannot just return one of the
|
|
|
|
* ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
|
|
|
|
* and the SPL layers.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
user_ns_get(int fd, struct user_namespace **userns)
|
|
|
|
{
|
|
|
|
struct kstatfs st;
|
|
|
|
struct file *nsfile;
|
|
|
|
struct ns_common *ns;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if ((nsfile = fget(fd)) == NULL)
|
|
|
|
return (EBADF);
|
|
|
|
if (vfs_statfs(&nsfile->f_path, &st) != 0) {
|
|
|
|
error = ENOTTY;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if (st.f_type != NSFS_MAGIC) {
|
|
|
|
error = ENOTTY;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
ns = get_proc_ns(file_inode(nsfile));
|
|
|
|
if (ns->ops->type != CLONE_NEWUSER) {
|
|
|
|
error = ENOTTY;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
*userns = container_of(ns, struct user_namespace, ns);
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
done:
|
|
|
|
fput(nsfile);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
|
|
|
|
|
|
|
static unsigned int
|
|
|
|
user_ns_zoneid(struct user_namespace *user_ns)
|
|
|
|
{
|
|
|
|
unsigned int r;
|
|
|
|
|
|
|
|
#if defined(HAVE_USER_NS_COMMON_INUM)
|
|
|
|
r = user_ns->ns.inum;
|
|
|
|
#else
|
|
|
|
r = user_ns->proc_inum;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return (r);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct zone_datasets *
|
|
|
|
zone_datasets_lookup(unsigned int nsinum)
|
|
|
|
{
|
|
|
|
zone_datasets_t *zds;
|
|
|
|
|
|
|
|
list_for_each_entry(zds, &zone_datasets, zds_list) {
|
|
|
|
if (user_ns_zoneid(zds->zds_userns) == nsinum)
|
|
|
|
return (zds);
|
|
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
|
|
|
static struct zone_dataset *
|
|
|
|
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
|
|
|
|
{
|
|
|
|
zone_dataset_t *zd;
|
|
|
|
|
|
|
|
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
|
|
|
if (zd->zd_dsnamelen != dsnamelen)
|
|
|
|
continue;
|
|
|
|
if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
|
|
|
|
return (zd);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
zone_dataset_cred_check(cred_t *cred)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
|
|
|
|
return (EPERM);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
|
|
|
|
|
|
|
static int
|
|
|
|
zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (dataset[0] == '\0' || dataset[0] == '/')
|
|
|
|
return (ENOENT);
|
|
|
|
|
|
|
|
*dsnamelen = strlen(dataset);
|
|
|
|
/* Ignore trailing slash, if supplied. */
|
|
|
|
if (dataset[*dsnamelen - 1] == '/')
|
|
|
|
(*dsnamelen)--;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2022-06-13 23:30:34 +03:00
|
|
|
zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
|
2021-02-21 19:19:43 +03:00
|
|
|
{
|
|
|
|
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
|
|
|
struct user_namespace *userns;
|
|
|
|
zone_datasets_t *zds;
|
|
|
|
zone_dataset_t *zd;
|
|
|
|
int error;
|
|
|
|
size_t dsnamelen;
|
|
|
|
|
|
|
|
if ((error = zone_dataset_cred_check(cred)) != 0)
|
|
|
|
return (error);
|
|
|
|
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
|
|
|
return (error);
|
2022-06-13 23:30:34 +03:00
|
|
|
if ((error = user_ns_get(userns_fd, &userns)) != 0)
|
2021-02-21 19:19:43 +03:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
mutex_enter(&zone_datasets_lock);
|
|
|
|
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
|
|
|
if (zds == NULL) {
|
|
|
|
zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
|
|
|
|
INIT_LIST_HEAD(&zds->zds_list);
|
|
|
|
INIT_LIST_HEAD(&zds->zds_datasets);
|
|
|
|
zds->zds_userns = userns;
|
|
|
|
/*
|
|
|
|
* Lock the namespace by incresing its refcount to prevent
|
|
|
|
* the namespace ID from being reused.
|
|
|
|
*/
|
|
|
|
get_user_ns(userns);
|
|
|
|
list_add_tail(&zds->zds_list, &zone_datasets);
|
|
|
|
} else {
|
|
|
|
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
|
|
|
if (zd != NULL) {
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (EEXIST);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
|
|
|
|
zd->zd_dsnamelen = dsnamelen;
|
Cleanup: Switch to strlcpy from strncpy
Coverity found a bug in `zfs_secpolicy_create_clone()` where it is
possible for us to pass an unterminated string when `zfs_get_parent()`
returns an error. Upon inspection, it is clear that using `strlcpy()`
would have avoided this issue.
Looking at the codebase, there are a number of other uses of `strncpy()`
that are unsafe and even when it is used safely, switching to
`strlcpy()` would make the code more readable. Therefore, we switch all
instances where we use `strncpy()` to use `strlcpy()`.
Unfortunately, we do not portably have access to `strlcpy()` in
tests/zfs-tests/cmd/zfs_diff-socket.c because it does not link to
libspl. Modifying the appropriate Makefile.am to try to link to it
resulted in an error from the naming choice used in the file. Trying to
disable the check on the file did not work on FreeBSD because Clang
ignores `#undef` when a definition is provided by `-Dstrncpy(...)=...`.
We workaround that by explictly including the C file from libspl into
the test. This makes things build correctly everywhere.
We add a deprecation warning to `config/Rules.am` and suppress it on the
remaining `strncpy()` usage. `strlcpy()` is not portably avaliable in
tests/zfs-tests/cmd/zfs_diff-socket.c, so we use `snprintf()` there as a
substitute.
This patch does not tackle the related problem of `strcpy()`, which is
even less safe. Thankfully, a quick inspection found that it is used far
more correctly than strncpy() was used. A quick inspection did not find
any problems with `strcpy()` usage outside of zhack, but it should be
said that I only checked around 90% of them.
Lastly, some of the fields in kstat_t varied in size by 1 depending on
whether they were in userspace or in the kernel. The origin of this
discrepancy appears to be 04a479f7066ccdaa23a6546955303b172f4a6909 where
it was made for no apparent reason. It conflicts with the comment on
KSTAT_STRLEN, so we shrink the kernel field sizes to match the userspace
field sizes.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13876
2022-09-28 02:35:29 +03:00
|
|
|
strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
|
2021-02-21 19:19:43 +03:00
|
|
|
INIT_LIST_HEAD(&zd->zd_list);
|
|
|
|
list_add_tail(&zd->zd_list, &zds->zds_datasets);
|
|
|
|
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (0);
|
|
|
|
#else
|
|
|
|
return (ENXIO);
|
|
|
|
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(zone_dataset_attach);
|
|
|
|
|
|
|
|
int
|
2022-06-13 23:30:34 +03:00
|
|
|
zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
|
2021-02-21 19:19:43 +03:00
|
|
|
{
|
|
|
|
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
|
|
|
struct user_namespace *userns;
|
|
|
|
zone_datasets_t *zds;
|
|
|
|
zone_dataset_t *zd;
|
|
|
|
int error;
|
|
|
|
size_t dsnamelen;
|
|
|
|
|
|
|
|
if ((error = zone_dataset_cred_check(cred)) != 0)
|
|
|
|
return (error);
|
|
|
|
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
|
|
|
return (error);
|
2022-06-13 23:30:34 +03:00
|
|
|
if ((error = user_ns_get(userns_fd, &userns)) != 0)
|
2021-02-21 19:19:43 +03:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
mutex_enter(&zone_datasets_lock);
|
|
|
|
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
|
|
|
if (zds != NULL)
|
|
|
|
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
|
|
|
if (zds == NULL || zd == NULL) {
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
|
|
|
|
|
|
|
list_del(&zd->zd_list);
|
|
|
|
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
|
|
|
|
|
|
|
/* Prune the namespace entry if it has no more delegations. */
|
|
|
|
if (list_empty(&zds->zds_datasets)) {
|
|
|
|
/*
|
|
|
|
* Decrease the refcount now that the namespace is no longer
|
|
|
|
* used. It is no longer necessary to prevent the namespace ID
|
|
|
|
* from being reused.
|
|
|
|
*/
|
|
|
|
put_user_ns(userns);
|
|
|
|
list_del(&zds->zds_list);
|
|
|
|
kmem_free(zds, sizeof (*zds));
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (0);
|
|
|
|
#else
|
|
|
|
return (ENXIO);
|
|
|
|
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(zone_dataset_detach);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A dataset is visible if:
|
|
|
|
* - It is a parent of a namespace entry.
|
|
|
|
* - It is one of the namespace entries.
|
|
|
|
* - It is a child of a namespace entry.
|
|
|
|
*
|
|
|
|
* A dataset is writable if:
|
|
|
|
* - It is one of the namespace entries.
|
|
|
|
* - It is a child of a namespace entry.
|
|
|
|
*
|
|
|
|
* The parent datasets of namespace entries are visible and
|
|
|
|
* read-only to provide a path back to the root of the pool.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zone_dataset_visible(const char *dataset, int *write)
|
|
|
|
{
|
|
|
|
zone_datasets_t *zds;
|
|
|
|
zone_dataset_t *zd;
|
|
|
|
size_t dsnamelen, zd_len;
|
|
|
|
int visible;
|
|
|
|
|
|
|
|
/* Default to read-only, in case visible is returned. */
|
|
|
|
if (write != NULL)
|
|
|
|
*write = 0;
|
|
|
|
if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
|
|
|
|
return (0);
|
|
|
|
if (INGLOBALZONE(curproc)) {
|
|
|
|
if (write != NULL)
|
|
|
|
*write = 1;
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_enter(&zone_datasets_lock);
|
|
|
|
zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
|
|
|
|
if (zds == NULL) {
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
visible = 0;
|
|
|
|
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
|
|
|
zd_len = strlen(zd->zd_dsname);
|
|
|
|
if (zd_len > dsnamelen) {
|
|
|
|
/*
|
|
|
|
* The name of the namespace entry is longer than that
|
|
|
|
* of the dataset, so it could be that the dataset is a
|
|
|
|
* parent of the namespace entry.
|
|
|
|
*/
|
|
|
|
visible = memcmp(zd->zd_dsname, dataset,
|
|
|
|
dsnamelen) == 0 &&
|
|
|
|
zd->zd_dsname[dsnamelen] == '/';
|
|
|
|
if (visible)
|
|
|
|
break;
|
|
|
|
} else if (zd_len == dsnamelen) {
|
|
|
|
/*
|
|
|
|
* The name of the namespace entry is as long as that
|
|
|
|
* of the dataset, so perhaps the dataset itself is the
|
|
|
|
* namespace entry.
|
|
|
|
*/
|
|
|
|
visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
|
|
|
|
if (visible) {
|
|
|
|
if (write != NULL)
|
|
|
|
*write = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The name of the namespace entry is shorter than that
|
|
|
|
* of the dataset, so perhaps the dataset is a child of
|
|
|
|
* the namespace entry.
|
|
|
|
*/
|
|
|
|
visible = memcmp(zd->zd_dsname, dataset,
|
|
|
|
zd_len) == 0 && dataset[zd_len] == '/';
|
|
|
|
if (visible) {
|
|
|
|
if (write != NULL)
|
|
|
|
*write = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_exit(&zone_datasets_lock);
|
|
|
|
return (visible);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(zone_dataset_visible);
|
|
|
|
|
|
|
|
unsigned int
|
|
|
|
global_zoneid(void)
|
|
|
|
{
|
|
|
|
unsigned int z = 0;
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_NS)
|
|
|
|
z = user_ns_zoneid(&init_user_ns);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return (z);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(global_zoneid);
|
|
|
|
|
|
|
|
unsigned int
|
|
|
|
crgetzoneid(const cred_t *cr)
|
|
|
|
{
|
|
|
|
unsigned int r = 0;
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_NS)
|
|
|
|
r = user_ns_zoneid(cr->user_ns);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return (r);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(crgetzoneid);
|
|
|
|
|
|
|
|
boolean_t
|
|
|
|
inglobalzone(proc_t *proc)
|
|
|
|
{
|
|
|
|
#if defined(CONFIG_USER_NS)
|
|
|
|
return (proc->cred->user_ns == &init_user_ns);
|
|
|
|
#else
|
|
|
|
return (B_TRUE);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(inglobalzone);
|
|
|
|
|
|
|
|
int
|
|
|
|
spl_zone_init(void)
|
|
|
|
{
|
|
|
|
mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
|
INIT_LIST_HEAD(&zone_datasets);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
spl_zone_fini(void)
|
|
|
|
{
|
|
|
|
zone_datasets_t *zds;
|
|
|
|
zone_dataset_t *zd;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It would be better to assert an empty zone_datasets, but since
|
|
|
|
* there's no automatic mechanism for cleaning them up if the user
|
|
|
|
* namespace is destroyed, just do it here, since spl is about to go
|
|
|
|
* out of context.
|
|
|
|
*/
|
|
|
|
while (!list_empty(&zone_datasets)) {
|
|
|
|
zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
|
|
|
|
while (!list_empty(&zds->zds_datasets)) {
|
|
|
|
zd = list_entry(zds->zds_datasets.next,
|
|
|
|
zone_dataset_t, zd_list);
|
|
|
|
list_del(&zd->zd_list);
|
|
|
|
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
|
|
|
}
|
2022-09-21 01:25:21 +03:00
|
|
|
put_user_ns(zds->zds_userns);
|
2021-02-21 19:19:43 +03:00
|
|
|
list_del(&zds->zds_list);
|
|
|
|
kmem_free(zds, sizeof (*zds));
|
|
|
|
}
|
|
|
|
mutex_destroy(&zone_datasets_lock);
|
|
|
|
}
|