mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
		
			
				
	
	
		
			423 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			423 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: BSD-2-Clause
 | 
						|
/*
 | 
						|
 * Copyright (c) 2021 Klara Systems, Inc.
 | 
						|
 * All rights reserved.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without
 | 
						|
 * modification, are permitted provided that the following conditions
 | 
						|
 * are met:
 | 
						|
 * 1. Redistributions of source code must retain the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer.
 | 
						|
 * 2. Redistributions in binary form must reproduce the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer in the
 | 
						|
 *    documentation and/or other materials provided with the distribution.
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 | 
						|
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
						|
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
						|
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 | 
						|
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
						|
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | 
						|
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | 
						|
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | 
						|
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | 
						|
 * SUCH DAMAGE.
 | 
						|
 */
 | 
						|
 | 
						|
#include <sys/types.h>
 | 
						|
#include <sys/sysmacros.h>
 | 
						|
#include <sys/kmem.h>
 | 
						|
#include <linux/file.h>
 | 
						|
#include <linux/magic.h>
 | 
						|
#include <sys/zone.h>
 | 
						|
#include <sys/string.h>
 | 
						|
 | 
						|
#if defined(CONFIG_USER_NS)
 | 
						|
#include <linux/statfs.h>
 | 
						|
#include <linux/proc_ns.h>
 | 
						|
#endif
 | 
						|
 | 
						|
#include <sys/mutex.h>
 | 
						|
 | 
						|
static kmutex_t zone_datasets_lock;
 | 
						|
static struct list_head zone_datasets;
 | 
						|
 | 
						|
typedef struct zone_datasets {
 | 
						|
	struct list_head zds_list;	/* zone_datasets linkage */
 | 
						|
	struct user_namespace *zds_userns; /* namespace reference */
 | 
						|
	struct list_head zds_datasets;	/* datasets for the namespace */
 | 
						|
} zone_datasets_t;
 | 
						|
 | 
						|
typedef struct zone_dataset {
 | 
						|
	struct list_head zd_list;	/* zone_dataset linkage */
 | 
						|
	size_t zd_dsnamelen;		/* length of name */
 | 
						|
	char zd_dsname[];		/* name of the member dataset */
 | 
						|
} zone_dataset_t;
 | 
						|
 | 
						|
#ifdef CONFIG_USER_NS
 | 
						|
/*
 | 
						|
 * Returns:
 | 
						|
 * - 0 on success
 | 
						|
 * - EBADF if it cannot open the provided file descriptor
 | 
						|
 * - ENOTTY if the file itself is a not a user namespace file. We want to
 | 
						|
 *   intercept this error in the ZFS layer. We cannot just return one of the
 | 
						|
 *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
 | 
						|
 *   and the SPL layers.
 | 
						|
 */
 | 
						|
static int
 | 
						|
user_ns_get(int fd, struct user_namespace **userns)
 | 
						|
{
 | 
						|
	struct kstatfs st;
 | 
						|
	struct file *nsfile;
 | 
						|
	struct ns_common *ns;
 | 
						|
	int error;
 | 
						|
 | 
						|
	if ((nsfile = fget(fd)) == NULL)
 | 
						|
		return (EBADF);
 | 
						|
	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
 | 
						|
		error = ENOTTY;
 | 
						|
		goto done;
 | 
						|
	}
 | 
						|
	if (st.f_type != NSFS_MAGIC) {
 | 
						|
		error = ENOTTY;
 | 
						|
		goto done;
 | 
						|
	}
 | 
						|
	ns = get_proc_ns(file_inode(nsfile));
 | 
						|
	if (ns->ops->type != CLONE_NEWUSER) {
 | 
						|
		error = ENOTTY;
 | 
						|
		goto done;
 | 
						|
	}
 | 
						|
	*userns = container_of(ns, struct user_namespace, ns);
 | 
						|
 | 
						|
	error = 0;
 | 
						|
done:
 | 
						|
	fput(nsfile);
 | 
						|
 | 
						|
	return (error);
 | 
						|
}
 | 
						|
#endif /* CONFIG_USER_NS */
 | 
						|
 | 
						|
static unsigned int
 | 
						|
user_ns_zoneid(struct user_namespace *user_ns)
 | 
						|
{
 | 
						|
	unsigned int r;
 | 
						|
 | 
						|
	r = user_ns->ns.inum;
 | 
						|
 | 
						|
	return (r);
 | 
						|
}
 | 
						|
 | 
						|
static struct zone_datasets *
 | 
						|
zone_datasets_lookup(unsigned int nsinum)
 | 
						|
{
 | 
						|
	zone_datasets_t *zds;
 | 
						|
 | 
						|
	list_for_each_entry(zds, &zone_datasets, zds_list) {
 | 
						|
		if (user_ns_zoneid(zds->zds_userns) == nsinum)
 | 
						|
			return (zds);
 | 
						|
	}
 | 
						|
	return (NULL);
 | 
						|
}
 | 
						|
 | 
						|
#ifdef CONFIG_USER_NS
 | 
						|
static struct zone_dataset *
 | 
						|
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
 | 
						|
{
 | 
						|
	zone_dataset_t *zd;
 | 
						|
 | 
						|
	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
 | 
						|
		if (zd->zd_dsnamelen != dsnamelen)
 | 
						|
			continue;
 | 
						|
		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
 | 
						|
			return (zd);
 | 
						|
	}
 | 
						|
 | 
						|
	return (NULL);
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
zone_dataset_cred_check(cred_t *cred)
 | 
						|
{
 | 
						|
 | 
						|
	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
 | 
						|
		return (EPERM);
 | 
						|
 | 
						|
	return (0);
 | 
						|
}
 | 
						|
#endif /* CONFIG_USER_NS */
 | 
						|
 | 
						|
static int
 | 
						|
zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
 | 
						|
{
 | 
						|
 | 
						|
	if (dataset[0] == '\0' || dataset[0] == '/')
 | 
						|
		return (ENOENT);
 | 
						|
 | 
						|
	*dsnamelen = strlen(dataset);
 | 
						|
	/* Ignore trailing slash, if supplied. */
 | 
						|
	if (dataset[*dsnamelen - 1] == '/')
 | 
						|
		(*dsnamelen)--;
 | 
						|
 | 
						|
	return (0);
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
 | 
						|
{
 | 
						|
#ifdef CONFIG_USER_NS
 | 
						|
	struct user_namespace *userns;
 | 
						|
	zone_datasets_t *zds;
 | 
						|
	zone_dataset_t *zd;
 | 
						|
	int error;
 | 
						|
	size_t dsnamelen;
 | 
						|
 | 
						|
	if ((error = zone_dataset_cred_check(cred)) != 0)
 | 
						|
		return (error);
 | 
						|
	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
 | 
						|
		return (error);
 | 
						|
	if ((error = user_ns_get(userns_fd, &userns)) != 0)
 | 
						|
		return (error);
 | 
						|
 | 
						|
	mutex_enter(&zone_datasets_lock);
 | 
						|
	zds = zone_datasets_lookup(user_ns_zoneid(userns));
 | 
						|
	if (zds == NULL) {
 | 
						|
		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
 | 
						|
		INIT_LIST_HEAD(&zds->zds_list);
 | 
						|
		INIT_LIST_HEAD(&zds->zds_datasets);
 | 
						|
		zds->zds_userns = userns;
 | 
						|
		/*
 | 
						|
		 * Lock the namespace by incresing its refcount to prevent
 | 
						|
		 * the namespace ID from being reused.
 | 
						|
		 */
 | 
						|
		get_user_ns(userns);
 | 
						|
		list_add_tail(&zds->zds_list, &zone_datasets);
 | 
						|
	} else {
 | 
						|
		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
 | 
						|
		if (zd != NULL) {
 | 
						|
			mutex_exit(&zone_datasets_lock);
 | 
						|
			return (EEXIST);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
 | 
						|
	zd->zd_dsnamelen = dsnamelen;
 | 
						|
	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
 | 
						|
	INIT_LIST_HEAD(&zd->zd_list);
 | 
						|
	list_add_tail(&zd->zd_list, &zds->zds_datasets);
 | 
						|
 | 
						|
	mutex_exit(&zone_datasets_lock);
 | 
						|
	return (0);
 | 
						|
#else
 | 
						|
	return (ENXIO);
 | 
						|
#endif /* CONFIG_USER_NS */
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(zone_dataset_attach);
 | 
						|
 | 
						|
int
 | 
						|
zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
 | 
						|
{
 | 
						|
#ifdef CONFIG_USER_NS
 | 
						|
	struct user_namespace *userns;
 | 
						|
	zone_datasets_t *zds;
 | 
						|
	zone_dataset_t *zd;
 | 
						|
	int error;
 | 
						|
	size_t dsnamelen;
 | 
						|
 | 
						|
	if ((error = zone_dataset_cred_check(cred)) != 0)
 | 
						|
		return (error);
 | 
						|
	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
 | 
						|
		return (error);
 | 
						|
	if ((error = user_ns_get(userns_fd, &userns)) != 0)
 | 
						|
		return (error);
 | 
						|
 | 
						|
	mutex_enter(&zone_datasets_lock);
 | 
						|
	zds = zone_datasets_lookup(user_ns_zoneid(userns));
 | 
						|
	if (zds != NULL)
 | 
						|
		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
 | 
						|
	if (zds == NULL || zd == NULL) {
 | 
						|
		mutex_exit(&zone_datasets_lock);
 | 
						|
		return (ENOENT);
 | 
						|
	}
 | 
						|
 | 
						|
	list_del(&zd->zd_list);
 | 
						|
	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
 | 
						|
 | 
						|
	/* Prune the namespace entry if it has no more delegations. */
 | 
						|
	if (list_empty(&zds->zds_datasets)) {
 | 
						|
		/*
 | 
						|
		 * Decrease the refcount now that the namespace is no longer
 | 
						|
		 * used. It is no longer necessary to prevent the namespace ID
 | 
						|
		 * from being reused.
 | 
						|
		 */
 | 
						|
		put_user_ns(userns);
 | 
						|
		list_del(&zds->zds_list);
 | 
						|
		kmem_free(zds, sizeof (*zds));
 | 
						|
	}
 | 
						|
 | 
						|
	mutex_exit(&zone_datasets_lock);
 | 
						|
	return (0);
 | 
						|
#else
 | 
						|
	return (ENXIO);
 | 
						|
#endif /* CONFIG_USER_NS */
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(zone_dataset_detach);
 | 
						|
 | 
						|
/*
 | 
						|
 * A dataset is visible if:
 | 
						|
 * - It is a parent of a namespace entry.
 | 
						|
 * - It is one of the namespace entries.
 | 
						|
 * - It is a child of a namespace entry.
 | 
						|
 *
 | 
						|
 * A dataset is writable if:
 | 
						|
 * - It is one of the namespace entries.
 | 
						|
 * - It is a child of a namespace entry.
 | 
						|
 *
 | 
						|
 * The parent datasets of namespace entries are visible and
 | 
						|
 * read-only to provide a path back to the root of the pool.
 | 
						|
 */
 | 
						|
int
 | 
						|
zone_dataset_visible(const char *dataset, int *write)
 | 
						|
{
 | 
						|
	zone_datasets_t *zds;
 | 
						|
	zone_dataset_t *zd;
 | 
						|
	size_t dsnamelen, zd_len;
 | 
						|
	int visible;
 | 
						|
 | 
						|
	/* Default to read-only, in case visible is returned. */
 | 
						|
	if (write != NULL)
 | 
						|
		*write = 0;
 | 
						|
	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
 | 
						|
		return (0);
 | 
						|
	if (INGLOBALZONE(curproc)) {
 | 
						|
		if (write != NULL)
 | 
						|
			*write = 1;
 | 
						|
		return (1);
 | 
						|
	}
 | 
						|
 | 
						|
	mutex_enter(&zone_datasets_lock);
 | 
						|
	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
 | 
						|
	if (zds == NULL) {
 | 
						|
		mutex_exit(&zone_datasets_lock);
 | 
						|
		return (0);
 | 
						|
	}
 | 
						|
 | 
						|
	visible = 0;
 | 
						|
	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
 | 
						|
		zd_len = strlen(zd->zd_dsname);
 | 
						|
		if (zd_len > dsnamelen) {
 | 
						|
			/*
 | 
						|
			 * The name of the namespace entry is longer than that
 | 
						|
			 * of the dataset, so it could be that the dataset is a
 | 
						|
			 * parent of the namespace entry.
 | 
						|
			 */
 | 
						|
			visible = memcmp(zd->zd_dsname, dataset,
 | 
						|
			    dsnamelen) == 0 &&
 | 
						|
			    zd->zd_dsname[dsnamelen] == '/';
 | 
						|
			if (visible)
 | 
						|
				break;
 | 
						|
		} else if (zd_len == dsnamelen) {
 | 
						|
			/*
 | 
						|
			 * The name of the namespace entry is as long as that
 | 
						|
			 * of the dataset, so perhaps the dataset itself is the
 | 
						|
			 * namespace entry.
 | 
						|
			 */
 | 
						|
			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
 | 
						|
			if (visible) {
 | 
						|
				if (write != NULL)
 | 
						|
					*write = 1;
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			/*
 | 
						|
			 * The name of the namespace entry is shorter than that
 | 
						|
			 * of the dataset, so perhaps the dataset is a child of
 | 
						|
			 * the namespace entry.
 | 
						|
			 */
 | 
						|
			visible = memcmp(zd->zd_dsname, dataset,
 | 
						|
			    zd_len) == 0 && dataset[zd_len] == '/';
 | 
						|
			if (visible) {
 | 
						|
				if (write != NULL)
 | 
						|
					*write = 1;
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	mutex_exit(&zone_datasets_lock);
 | 
						|
	return (visible);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(zone_dataset_visible);
 | 
						|
 | 
						|
unsigned int
 | 
						|
global_zoneid(void)
 | 
						|
{
 | 
						|
	unsigned int z = 0;
 | 
						|
 | 
						|
#if defined(CONFIG_USER_NS)
 | 
						|
	z = user_ns_zoneid(&init_user_ns);
 | 
						|
#endif
 | 
						|
 | 
						|
	return (z);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(global_zoneid);
 | 
						|
 | 
						|
unsigned int
 | 
						|
crgetzoneid(const cred_t *cr)
 | 
						|
{
 | 
						|
	unsigned int r = 0;
 | 
						|
 | 
						|
#if defined(CONFIG_USER_NS)
 | 
						|
	r = user_ns_zoneid(cr->user_ns);
 | 
						|
#endif
 | 
						|
 | 
						|
	return (r);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(crgetzoneid);
 | 
						|
 | 
						|
boolean_t
 | 
						|
inglobalzone(proc_t *proc)
 | 
						|
{
 | 
						|
#if defined(CONFIG_USER_NS)
 | 
						|
	return (proc->cred->user_ns == &init_user_ns);
 | 
						|
#else
 | 
						|
	return (B_TRUE);
 | 
						|
#endif
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(inglobalzone);
 | 
						|
 | 
						|
int
 | 
						|
spl_zone_init(void)
 | 
						|
{
 | 
						|
	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
 | 
						|
	INIT_LIST_HEAD(&zone_datasets);
 | 
						|
	return (0);
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
spl_zone_fini(void)
 | 
						|
{
 | 
						|
	zone_datasets_t *zds;
 | 
						|
	zone_dataset_t *zd;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * It would be better to assert an empty zone_datasets, but since
 | 
						|
	 * there's no automatic mechanism for cleaning them up if the user
 | 
						|
	 * namespace is destroyed, just do it here, since spl is about to go
 | 
						|
	 * out of context.
 | 
						|
	 */
 | 
						|
	while (!list_empty(&zone_datasets)) {
 | 
						|
		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
 | 
						|
		while (!list_empty(&zds->zds_datasets)) {
 | 
						|
			zd = list_entry(zds->zds_datasets.next,
 | 
						|
			    zone_dataset_t, zd_list);
 | 
						|
			list_del(&zd->zd_list);
 | 
						|
			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
 | 
						|
		}
 | 
						|
		put_user_ns(zds->zds_userns);
 | 
						|
		list_del(&zds->zds_list);
 | 
						|
		kmem_free(zds, sizeof (*zds));
 | 
						|
	}
 | 
						|
	mutex_destroy(&zone_datasets_lock);
 | 
						|
}
 |