Add FreeBSD support to OpenZFS

Add the FreeBSD platform code to the OpenZFS repository.  As of this
commit the source can be compiled and tested on FreeBSD 11 and 12.
Subsequent commits are now required to compile on FreeBSD and Linux.
Additionally, they must pass the ZFS Test Suite on FreeBSD which is
being run by the CI.  As of this commit 1230 tests pass on FreeBSD
and there are no unexpected failures.

Reviewed-by: Sean Eric Fagan <sef@ixsystems.com>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Matt Macy <mmacy@FreeBSD.org>
Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
Closes #898 
Closes #8987
This commit is contained in:
Matthew Macy
2020-04-14 11:36:28 -07:00
committed by GitHub
parent 75c62019f3
commit 9f0a21e641
209 changed files with 46197 additions and 77 deletions
+10
View File
@@ -2,6 +2,8 @@
*.ko.unsigned
*.ko.out
*.ko.out.sig
*.ko.debug
*.ko.full
*.dwo
.*.cmd
.*.d
@@ -11,5 +13,13 @@
/.tmp_versions
/Module.markers
/Module.symvers
/vnode_if*
/bus_if.h
/device_if.h
/opt_global.h
/export_syms
/machine
/x86
!Makefile.in
+381
View File
@@ -0,0 +1,381 @@
.if !defined(WITH_CTF)
WITH_CTF=1
.endif
.include <bsd.sys.mk>
SRCDIR= ${.CURDIR}
INCDIR=${.CURDIR:H}/include
KMOD= openzfs
.PATH: ${SRCDIR}/avl \
${SRCDIR}/lua \
${SRCDIR}/nvpair \
${SRCDIR}/os/freebsd/spl \
${SRCDIR}/os/freebsd/zfs \
${SRCDIR}/unicode \
${SRCDIR}/zcommon \
${SRCDIR}/zfs
CFLAGS+= -I${INCDIR}
CFLAGS+= -I${INCDIR}/spl
CFLAGS+= -I${INCDIR}/os/freebsd
CFLAGS+= -I${INCDIR}/os/freebsd/spl
CFLAGS+= -I${INCDIR}/os/freebsd/zfs
CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1
CFLAGS+= -DHAVE_UIO_ZEROCOPY -DWITHOUT_NETDUMP -D__KERNEL -D_SYS_CONDVAR_H_
CFLAGS+= -D_SYS_VMEM_H_ -D_MACHINE_ENDIAN_H_ -DKDTRACE_HOOKS -DSMP
.if ${MACHINE_ARCH} == "amd64"
CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F
.endif
.if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
CFLAGS+= -DINVARIANTS -DWITNESS -g -O0 -DZFS_DEBUG -DOPENSOLARIS_WITNESS
.else
CFLAGS += -DNDEBUG
.endif
.if defined(WITH_VFS_DEBUG) && ${WITH_VFS_DEBUG} == "true"
# kernel must also be built with this option for this to work
CFLAGS+= -DDEBUG_VFS_LOCKS
.endif
.if defined(WITH_GCOV) && ${WITH_GCOV} == "true"
CFLAGS+= -fprofile-arcs -ftest-coverage
.endif
DEBUG_FLAGS=-g
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
${MACHINE_ARCH} == "arm"
CFLAGS+= -DBITS_PER_LONG=32
.else
CFLAGS+= -DBITS_PER_LONG=64
.endif
SRCS= vnode_if.h device_if.h bus_if.h
# avl
SRCS+= avl.c
#lua
SRCS+= lapi.c \
lauxlib.c \
lbaselib.c \
lcode.c \
lcompat.c \
lcorolib.c \
lctype.c \
ldebug.c \
ldo.c \
lfunc.c \
lgc.c \
llex.c \
lmem.c \
lobject.c \
lopcodes.c \
lparser.c \
lstate.c \
lstring.c \
lstrlib.c \
ltable.c \
ltablib.c \
ltm.c \
lvm.c \
lzio.c
#nvpair
SRCS+= nvpair.c \
fnvpair.c \
nvpair_alloc_spl.c \
nvpair_alloc_fixed.c
#os/freebsd/spl
SRCS+= acl_common.c \
btree.c \
callb.c \
list.c \
spl_acl.c \
spl_cmn_err.c \
spl_dtrace.c \
spl_kmem.c \
spl_kstat.c \
spl_misc.c \
spl_policy.c \
spl_string.c \
spl_sunddi.c \
spl_sysevent.c \
spl_taskq.c \
spl_uio.c \
spl_vfs.c \
spl_vm.c \
spl_zone.c \
sha256c.c \
sha512c.c \
spl_procfs_list.c \
spl_zlib.c
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
${MACHINE_ARCH} == "arm"
SRCS+= spl_atomic.c
.endif
#os/freebsd/zfs
SRCS+= abd.c \
crypto_os.c \
dmu_os.c \
hkdf.c \
kmod_core.c \
spa_os.c \
sysctl_os.c \
vdev_file.c \
vdev_label_os.c \
vdev_geom.c \
zfs_acl.c \
zfs_ctldir.c \
zfs_dir.c \
zfs_ioctl_os.c \
zfs_log.c \
zfs_replay.c \
zfs_vfsops.c \
zfs_vnops.c \
zfs_znode.c \
zio_crypt.c \
zvol_os.c
#unicode
SRCS+= uconv.c \
u8_textprep.c
#zcommon
SRCS+= zfeature_common.c \
zfs_comutil.c \
zfs_deleg.c \
zfs_fletcher.c \
zfs_fletcher_avx512.c \
zfs_fletcher_intel.c \
zfs_fletcher_sse.c \
zfs_fletcher_superscalar.c \
zfs_fletcher_superscalar4.c \
zfs_namecheck.c \
zfs_prop.c \
zpool_prop.c \
zprop_common.c
#zfs
SRCS+= aggsum.c \
arc.c \
arc_os.c \
blkptr.c \
bplist.c \
bpobj.c \
cityhash.c \
dbuf.c \
dbuf_stats.c \
bptree.c \
bqueue.c \
dataset_kstats.c \
ddt.c \
ddt_zap.c \
dmu.c \
dmu_diff.c \
dmu_object.c \
dmu_objset.c \
dmu_recv.c \
dmu_redact.c \
dmu_send.c \
dmu_traverse.c \
dmu_tx.c \
dmu_zfetch.c \
dnode.c \
dnode_sync.c \
dsl_dataset.c \
dsl_deadlist.c \
dsl_deleg.c \
dsl_bookmark.c \
dsl_dir.c \
dsl_crypt.c \
dsl_destroy.c \
dsl_pool.c \
dsl_prop.c \
dsl_scan.c \
dsl_synctask.c \
dsl_userhold.c \
fm.c \
gzip.c \
lzjb.c \
lz4.c \
metaslab.c \
mmp.c \
multilist.c \
objlist.c \
pathname.c \
range_tree.c \
refcount.c \
rrwlock.c \
sa.c \
sha256.c \
skein_zfs.c \
spa.c \
spa_boot.c \
spa_checkpoint.c \
spa_config.c \
spa_errlog.c \
spa_history.c \
spa_log_spacemap.c \
spa_misc.c \
spa_stats.c \
space_map.c \
space_reftree.c \
txg.c \
uberblock.c \
unique.c \
vdev.c \
vdev_cache.c \
vdev_indirect.c \
vdev_indirect_births.c \
vdev_indirect_mapping.c \
vdev_initialize.c \
vdev_label.c \
vdev_mirror.c \
vdev_missing.c \
vdev_queue.c \
vdev_raidz.c \
vdev_raidz_math.c \
vdev_raidz_math_scalar.c \
vdev_raidz_math_avx2.c \
vdev_raidz_math_avx512bw.c \
vdev_raidz_math_avx512f.c \
vdev_raidz_math_sse2.c \
vdev_raidz_math_ssse3.c \
vdev_removal.c \
vdev_root.c \
vdev_trim.c \
zap.c \
zap_leaf.c \
zap_micro.c \
zcp.c \
zcp_get.c \
zcp_global.c \
zcp_iter.c \
zcp_set.c \
zcp_synctask.c \
zfeature.c \
zfs_byteswap.c \
zfs_debug.c \
zfs_file_os.c \
zfs_fm.c \
zfs_fuid.c \
zfs_fuid_os.c \
zfs_ioctl.c \
zfs_onexit.c \
zfs_quota.c \
zfs_ratelimit.c \
zfs_rlock.c \
zfs_sa.c \
zil.c \
zio.c \
zio_checksum.c \
zio_compress.c \
zio_inject.c \
zle.c \
zrlock.c \
zthr.c \
zvol.c
beforeinstall:
.if ${MK_DEBUG_FILES} != "no"
mtree -eu \
-f /etc/mtree/BSD.debug.dist \
-p ${DESTDIR}/usr/lib
.endif
.include <bsd.kmod.mk>
CFLAGS.gcc+= -Wno-pointer-to-int-cast
CFLAGS.lapi.c= -Wno-cast-qual
CFLAGS.lcompat.c= -Wno-cast-qual -Wno-missing-prototypes
CFLAGS.lobject.c= -Wno-cast-qual
CFLAGS.ltable.c= -Wno-cast-qual
CFLAGS.lvm.c= -Wno-cast-qual
CFLAGS.nvpair.c= -Wno-cast-qual
CFLAGS.acl_common.c= -Wno-strict-prototypes -Wno-missing-prototypes
CFLAGS.callb.c= -Wno-strict-prototypes -Wno-missing-prototypes
CFLAGS.spl_kstat.c= -Wno-missing-prototypes
CFLAGS.spl_string.c= -Wno-cast-qual
CFLAGS.spl_vm.c= -Wno-cast-qual -Wno-missing-prototypes
CFLAGS.spl_zlib.c= -Wno-cast-qual
CFLAGS.abd.c= -Wno-cast-qual
CFLAGS.freebsd_dmu.c= -Wno-missing-prototypes
CFLAGS.freebsd_kmod.c= -Wno-missing-prototypes
CFLAGS.vdev_geom.c= -Wno-missing-prototypes
CFLAGS.zfs_acl.c= -Wno-missing-prototypes
CFLAGS.zfs_ctldir.c= -Wno-missing-prototypes -Wno-strict-prototypes
CFLAGS.zfs_log.c= -Wno-cast-qual
CFLAGS.zfs_vfsops.c= -Wno-missing-prototypes
CFLAGS.zfs_vnops.c= -Wno-missing-prototypes -Wno-strict-prototypes -Wno-pointer-arith
CFLAGS.zfs_znode.c= -Wno-missing-prototypes
CFLAGS.zvol.c= -Wno-missing-prototypes
CFLAGS.u8_textprep.c= -Wno-cast-qual
CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
CFLAGS.zfs_fletcher_intel.c= -Wno-cast-qual -Wno-pointer-arith
CFLAGS.zfs_fletcher_sse.c= -Wno-cast-qual -Wno-pointer-arith
CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
CFLAGS.zprop_common.c= -Wno-cast-qual
CFLAGS.arc.c= -Wno-missing-prototypes
CFLAGS.blkptr.c= -Wno-missing-prototypes
CFLAGS.dbuf.c= -Wno-missing-prototypes
CFLAGS.dbuf_stats.c= -Wno-missing-prototypes
CFLAGS.ddt.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.dmu.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.dmu_object.c= -Wno-missing-prototypes
CFLAGS.dmu_objset.c= -Wno-missing-prototypes
CFLAGS.dmu_traverse.c= -Wno-cast-qual
CFLAGS.dsl_dir.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.dsl_crypt.c= -Wno-missing-prototypes
CFLAGS.dsl_deadlist.c= -Wno-cast-qual
CFLAGS.dsl_pool.c= -Wno-missing-prototypes
CFLAGS.dsl_prop.c= -Wno-cast-qual
CFLAGS.dsl_scan.c= -Wno-missing-prototypes
CFLAGS.fm.c= -Wno-cast-qual
CFLAGS.gzip.c= -Wno-missing-prototypes
CFLAGS.lzjb.c= -Wno-missing-prototypes
CFLAGS.lz4.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.metaslab.c= -Wno-missing-prototypes
CFLAGS.sa.c= -Wno-missing-prototypes
CFLAGS.sha256.c= -Wno-missing-prototypes
CFLAGS.skein_zfs.c= -Wno-missing-prototypes
CFLAGS.spa.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.spa_boot.c= -Wno-missing-prototypes
CFLAGS.spa_misc.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.space_map.c= -Wno-missing-prototypes
CFLAGS.vdev.c= -Wno-missing-prototypes
CFLAGS.vdev_indirect.c= -Wno-missing-prototypes
CFLAGS.vdev_label.c= -Wno-missing-prototypes
CFLAGS.vdev_queue.c= -Wno-missing-prototypes
CFLAGS.vdev_raidz.c= -Wno-cast-qual
CFLAGS.vdev_raidz_math.c= -Wno-cast-qual
CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual -Wno-missing-prototypes
CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
CFLAGS.zap_leaf.c= -Wno-cast-qual
CFLAGS.zap_micro.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.zcp.c= -Wno-cast-qual
CFLAGS.zcp_get.c= -Wno-missing-prototypes
CFLAGS.zfs_debug.c= -Wno-missing-prototypes
CFLAGS.zfs_fm.c= -Wno-cast-qual
CFLAGS.zfs_ioctl.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.zil.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.zio.c= -Wno-missing-prototypes -Wno-cast-qual
CFLAGS.zio_checksum.c= -Wno-missing-prototypes
CFLAGS.zle.c= -Wno-missing-prototypes
CFLAGS.zrlock.c= -Wno-missing-prototypes -Wno-cast-qual
+23 -4
View File
@@ -12,7 +12,7 @@ obj-m += os/linux/zfs/
INSTALL_MOD_DIR ?= extra
ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@
ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include/os/linux/kernel
ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include/os/linux/spl
@@ -40,6 +40,11 @@ modules-Linux:
done
$(MAKE) -C @LINUX_OBJ@ M=`pwd` @KERNEL_MAKE@ CONFIG_ZFS=m modules
# Only pass down gmake -j flag, if used.
modules-FreeBSD:
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
env MAKEFLAGS="" make $${flags} -f Makefile.bsd
modules-unknown:
@true
@@ -55,6 +60,10 @@ clean-Linux:
find . -name '*.ur-safe' -type f -print | xargs $(RM)
clean-FreeBSD:
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
env MAKEFLAGS="" make $${flags} -f Makefile.bsd clean
clean: clean-@ac_system@
modules_install-Linux:
@@ -100,6 +109,11 @@ cscopelist-am: $(am__tagged_files)
fi; \
done >> $(top_builddir)/cscope.files
modules_install-FreeBSD:
@# Install the kernel modules
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
env MAKEFLAGS="" make $${flags} -f Makefile.bsd install
modules_install: modules_install-@ac_system@
modules_uninstall-Linux:
@@ -109,11 +123,16 @@ modules_uninstall-Linux:
$(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \
done
modules_uninstall-FreeBSD:
@false
modules_uninstall: modules_uninstall-@ac_system@
distdir:
list='$(obj-m)'; for objdir in $$list; do \
(cd @top_srcdir@/module && find $$objdir \
-name '*.c' -o -name '*.h' -o -name '*.S' | \
xargs -r cp --parents -t @abs_top_builddir@/module/$$distdir); \
(cd @top_srcdir@/module && find $$objdir -name '*.[chS]' | \
while read path; do \
mkdir -p @abs_top_builddir@/module/$$distdir/$${path%/*}; \
cp $$path @abs_top_builddir@/module/$$distdir/$$path; \
done); \
done
File diff suppressed because it is too large Load Diff
+372
View File
@@ -0,0 +1,372 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/param.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/callb.h>
#include <sys/kmem.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kobj.h>
#include <sys/systm.h> /* for delay() */
#include <sys/taskq.h> /* For TASKQ_NAMELEN */
#include <sys/kernel.h>
#define CB_MAXNAME TASKQ_NAMELEN
/*
* The callb mechanism provides generic event scheduling/echoing.
* A callb function is registered and called on behalf of the event.
*/
typedef struct callb {
struct callb *c_next; /* next in class or on freelist */
kthread_id_t c_thread; /* ptr to caller's thread struct */
char c_flag; /* info about the callb state */
uchar_t c_class; /* this callb's class */
kcondvar_t c_done_cv; /* signal callb completion */
boolean_t (*c_func)(); /* cb function: returns true if ok */
void *c_arg; /* arg to c_func */
char c_name[CB_MAXNAME+1]; /* debug:max func name length */
} callb_t;
/*
* callb c_flag bitmap definitions
*/
#define CALLB_FREE 0x0
#define CALLB_TAKEN 0x1
#define CALLB_EXECUTING 0x2
/*
* Basic structure for a callb table.
* All callbs are organized into different class groups described
* by ct_class array.
* The callbs within a class are single-linked and normally run by a
* serial execution.
*/
typedef struct callb_table {
kmutex_t ct_lock; /* protect all callb states */
callb_t *ct_freelist; /* free callb structures */
int ct_busy; /* != 0 prevents additions */
kcondvar_t ct_busy_cv; /* to wait for not busy */
int ct_ncallb; /* num of callbs allocated */
callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */
} callb_table_t;
int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
static callb_id_t callb_add_common(boolean_t (*)(void *, int),
void *, int, char *, kthread_id_t);
static callb_table_t callb_table; /* system level callback table */
static callb_table_t *ct = &callb_table;
static kmutex_t callb_safe_mutex;
callb_cpr_t callb_cprinfo_safe = {
&callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} };
/*
* Init all callb tables in the system.
*/
void
callb_init(void *dummy __unused)
{
callb_table.ct_busy = 0; /* mark table open for additions */
mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
}
void
callb_fini(void *dummy __unused)
{
callb_t *cp;
int i;
mutex_enter(&ct->ct_lock);
for (i = 0; i < 16; i++) {
while ((cp = ct->ct_freelist) != NULL) {
ct->ct_freelist = cp->c_next;
ct->ct_ncallb--;
kmem_free(cp, sizeof (callb_t));
}
if (ct->ct_ncallb == 0)
break;
/* Not all callbacks finished, waiting for the rest. */
mutex_exit(&ct->ct_lock);
tsleep(ct, 0, "callb", hz / 4);
mutex_enter(&ct->ct_lock);
}
if (ct->ct_ncallb > 0)
printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb);
mutex_exit(&ct->ct_lock);
mutex_destroy(&callb_safe_mutex);
mutex_destroy(&callb_table.ct_lock);
}
/*
* callout_add() is called to register func() be called later.
*/
static callb_id_t
callb_add_common(boolean_t (*func)(void *arg, int code),
void *arg, int class, char *name, kthread_id_t t)
{
callb_t *cp;
ASSERT(class < NCBCLASS);
mutex_enter(&ct->ct_lock);
while (ct->ct_busy)
cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
if ((cp = ct->ct_freelist) == NULL) {
ct->ct_ncallb++;
cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
}
ct->ct_freelist = cp->c_next;
cp->c_thread = t;
cp->c_func = func;
cp->c_arg = arg;
cp->c_class = (uchar_t)class;
cp->c_flag |= CALLB_TAKEN;
#ifdef DEBUG
if (strlen(name) > CB_MAXNAME)
cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
"too long -- truncated to %d chars",
name, CB_MAXNAME);
#endif
(void) strncpy(cp->c_name, name, CB_MAXNAME);
cp->c_name[CB_MAXNAME] = '\0';
/*
* Insert the new callb at the head of its class list.
*/
cp->c_next = ct->ct_first_cb[class];
ct->ct_first_cb[class] = cp;
mutex_exit(&ct->ct_lock);
return ((callb_id_t)cp);
}
/*
* The default function to add an entry to the callback table. Since
* it uses curthread as the thread identifier to store in the table,
* it should be used for the normal case of a thread which is calling
* to add ITSELF to the table.
*/
callb_id_t
callb_add(boolean_t (*func)(void *arg, int code),
void *arg, int class, char *name)
{
return (callb_add_common(func, arg, class, name, curthread));
}
/*
* A special version of callb_add() above for use by threads which
* might be adding an entry to the table on behalf of some other
* thread (for example, one which is constructed but not yet running).
* In this version the thread id is an argument.
*/
callb_id_t
callb_add_thread(boolean_t (*func)(void *arg, int code),
void *arg, int class, char *name, kthread_id_t t)
{
return (callb_add_common(func, arg, class, name, t));
}
/*
* callout_delete() is called to remove an entry identified by id
* that was originally placed there by a call to callout_add().
* return -1 if fail to delete a callb entry otherwise return 0.
*/
int
callb_delete(callb_id_t id)
{
callb_t **pp;
callb_t *me = (callb_t *)id;
mutex_enter(&ct->ct_lock);
for (;;) {
pp = &ct->ct_first_cb[me->c_class];
while (*pp != NULL && *pp != me)
pp = &(*pp)->c_next;
#ifdef DEBUG
if (*pp != me) {
cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
(void *)me);
mutex_exit(&ct->ct_lock);
return (-1);
}
#endif /* DEBUG */
/*
* It is not allowed to delete a callb in the middle of
* executing otherwise, the callb_execute() will be confused.
*/
if (!(me->c_flag & CALLB_EXECUTING))
break;
cv_wait(&me->c_done_cv, &ct->ct_lock);
}
/* relink the class list */
*pp = me->c_next;
/* clean up myself and return the free callb to the head of freelist */
me->c_flag = CALLB_FREE;
me->c_next = ct->ct_freelist;
ct->ct_freelist = me;
mutex_exit(&ct->ct_lock);
return (0);
}
/*
* class: indicates to execute all callbs in the same class;
* code: optional argument for the callb functions.
* return: = 0: success
* != 0: ptr to string supplied when callback was registered
*/
void *
callb_execute_class(int class, int code)
{
callb_t *cp;
void *ret = NULL;
ASSERT(class < NCBCLASS);
mutex_enter(&ct->ct_lock);
for (cp = ct->ct_first_cb[class];
cp != NULL && ret == 0; cp = cp->c_next) {
while (cp->c_flag & CALLB_EXECUTING)
cv_wait(&cp->c_done_cv, &ct->ct_lock);
/*
* cont if the callb is deleted while we're sleeping
*/
if (cp->c_flag == CALLB_FREE)
continue;
cp->c_flag |= CALLB_EXECUTING;
#ifdef CALLB_DEBUG
printf("callb_execute: name=%s func=%p arg=%p\n",
cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
#endif /* CALLB_DEBUG */
mutex_exit(&ct->ct_lock);
/* If callback function fails, pass back client's name */
if (!(*cp->c_func)(cp->c_arg, code))
ret = cp->c_name;
mutex_enter(&ct->ct_lock);
cp->c_flag &= ~CALLB_EXECUTING;
cv_broadcast(&cp->c_done_cv);
}
mutex_exit(&ct->ct_lock);
return (ret);
}
/*
* callers make sure no recursive entries to this func.
* dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
*
* When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
* use a cv_timedwait() in case the kernel thread is blocked.
*
* Note that this is a generic callback handler for daemon CPR and
* should NOT be changed to accommodate any specific requirement in a daemon.
* Individual daemons that require changes to the handler shall write
* callback routines in their own daemon modules.
*/
boolean_t
callb_generic_cpr(void *arg, int code)
{
callb_cpr_t *cp = (callb_cpr_t *)arg;
clock_t ret = 0; /* assume success */
mutex_enter(cp->cc_lockp);
switch (code) {
case CB_CODE_CPR_CHKPT:
cp->cc_events |= CALLB_CPR_START;
#ifdef CPR_NOT_THREAD_SAFE
while (!(cp->cc_events & CALLB_CPR_SAFE))
/* cv_timedwait() returns -1 if it times out. */
if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
cp->cc_lockp, (callb_timeout_sec * hz),
TR_CLOCK_TICK)) == -1)
break;
#endif
break;
case CB_CODE_CPR_RESUME:
cp->cc_events &= ~CALLB_CPR_START;
cv_signal(&cp->cc_stop_cv);
break;
}
mutex_exit(cp->cc_lockp);
return (ret != -1);
}
/*
* The generic callback function associated with kernel threads which
* are always considered safe.
*/
/* ARGSUSED */
boolean_t
callb_generic_cpr_safe(void *arg, int code)
{
return (B_TRUE);
}
/*
* Prevent additions to callback table.
*/
void
callb_lock_table(void)
{
mutex_enter(&ct->ct_lock);
ASSERT(ct->ct_busy == 0);
ct->ct_busy = 1;
mutex_exit(&ct->ct_lock);
}
/*
* Allow additions to callback table.
*/
void
callb_unlock_table(void)
{
mutex_enter(&ct->ct_lock);
ASSERT(ct->ct_busy != 0);
ct->ct_busy = 0;
cv_broadcast(&ct->ct_busy_cv);
mutex_exit(&ct->ct_lock);
}
SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);
+245
View File
@@ -0,0 +1,245 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Generic doubly-linked list implementation
*/
#include <sys/list.h>
#include <sys/list_impl.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/debug.h>
#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
#define list_insert_after_node(list, node, object) { \
list_node_t *lnew = list_d2l(list, object); \
lnew->list_prev = (node); \
lnew->list_next = (node)->list_next; \
(node)->list_next->list_prev = lnew; \
(node)->list_next = lnew; \
}
#define list_insert_before_node(list, node, object) { \
list_node_t *lnew = list_d2l(list, object); \
lnew->list_next = (node); \
lnew->list_prev = (node)->list_prev; \
(node)->list_prev->list_next = lnew; \
(node)->list_prev = lnew; \
}
#define list_remove_node(node) \
(node)->list_prev->list_next = (node)->list_next; \
(node)->list_next->list_prev = (node)->list_prev; \
(node)->list_next = (node)->list_prev = NULL
void
list_create(list_t *list, size_t size, size_t offset)
{
ASSERT(list);
ASSERT(size > 0);
ASSERT(size >= offset + sizeof (list_node_t));
list->list_size = size;
list->list_offset = offset;
list->list_head.list_next = list->list_head.list_prev =
&list->list_head;
}
void
list_destroy(list_t *list)
{
list_node_t *node = &list->list_head;
ASSERT(list);
ASSERT(list->list_head.list_next == node);
ASSERT(list->list_head.list_prev == node);
node->list_next = node->list_prev = NULL;
}
void
list_insert_after(list_t *list, void *object, void *nobject)
{
if (object == NULL) {
list_insert_head(list, nobject);
} else {
list_node_t *lold = list_d2l(list, object);
list_insert_after_node(list, lold, nobject);
}
}
void
list_insert_before(list_t *list, void *object, void *nobject)
{
if (object == NULL) {
list_insert_tail(list, nobject);
} else {
list_node_t *lold = list_d2l(list, object);
list_insert_before_node(list, lold, nobject);
}
}
void
list_insert_head(list_t *list, void *object)
{
list_node_t *lold = &list->list_head;
list_insert_after_node(list, lold, object);
}
void
list_insert_tail(list_t *list, void *object)
{
list_node_t *lold = &list->list_head;
list_insert_before_node(list, lold, object);
}
void
list_remove(list_t *list, void *object)
{
list_node_t *lold = list_d2l(list, object);
ASSERT(!list_empty(list));
ASSERT(lold->list_next != NULL);
list_remove_node(lold);
}
void *
list_remove_head(list_t *list)
{
list_node_t *head = list->list_head.list_next;
if (head == &list->list_head)
return (NULL);
list_remove_node(head);
return (list_object(list, head));
}
void *
list_remove_tail(list_t *list)
{
list_node_t *tail = list->list_head.list_prev;
if (tail == &list->list_head)
return (NULL);
list_remove_node(tail);
return (list_object(list, tail));
}
void *
list_head(list_t *list)
{
if (list_empty(list))
return (NULL);
return (list_object(list, list->list_head.list_next));
}
void *
list_tail(list_t *list)
{
if (list_empty(list))
return (NULL);
return (list_object(list, list->list_head.list_prev));
}
void *
list_next(list_t *list, void *object)
{
list_node_t *node = list_d2l(list, object);
if (node->list_next != &list->list_head)
return (list_object(list, node->list_next));
return (NULL);
}
void *
list_prev(list_t *list, void *object)
{
list_node_t *node = list_d2l(list, object);
if (node->list_prev != &list->list_head)
return (list_object(list, node->list_prev));
return (NULL);
}
/*
* Insert src list after dst list. Empty src list thereafter.
*/
void
list_move_tail(list_t *dst, list_t *src)
{
list_node_t *dstnode = &dst->list_head;
list_node_t *srcnode = &src->list_head;
ASSERT(dst->list_size == src->list_size);
ASSERT(dst->list_offset == src->list_offset);
if (list_empty(src))
return;
dstnode->list_prev->list_next = srcnode->list_next;
srcnode->list_next->list_prev = dstnode->list_prev;
dstnode->list_prev = srcnode->list_prev;
srcnode->list_prev->list_next = dstnode;
/* empty src list */
srcnode->list_next = srcnode->list_prev = srcnode;
}
void
list_link_replace(list_node_t *lold, list_node_t *lnew)
{
ASSERT(list_link_active(lold));
ASSERT(!list_link_active(lnew));
lnew->list_next = lold->list_next;
lnew->list_prev = lold->list_prev;
lold->list_prev->list_next = lnew;
lold->list_next->list_prev = lnew;
lold->list_next = lold->list_prev = NULL;
}
void
list_link_init(list_node_t *link)
{
link->list_next = NULL;
link->list_prev = NULL;
}
int
list_link_active(list_node_t *link)
{
return (link->list_next != NULL);
}
int
list_is_empty(list_t *list)
{
return (list_empty(list));
}
+96
View File
@@ -0,0 +1,96 @@
/*
* Copyright 2005 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SHA224_H_
#define _SHA224_H_
#ifndef _KERNEL
#include <sys/types.h>
#endif
#define SHA224_BLOCK_LENGTH 64
#define SHA224_DIGEST_LENGTH 28
#define SHA224_DIGEST_STRING_LENGTH (SHA224_DIGEST_LENGTH * 2 + 1)
typedef struct SHA224Context {
uint32_t state[8];
uint64_t count;
uint8_t buf[SHA224_BLOCK_LENGTH];
} SHA224_CTX;
__BEGIN_DECLS
/* Ensure libmd symbols do not clash with libcrypto */
#ifndef SHA224_Init
#define SHA224_Init _libmd_SHA224_Init
#endif
#ifndef SHA224_Update
#define SHA224_Update _libmd_SHA224_Update
#endif
#ifndef SHA224_Final
#define SHA224_Final _libmd_SHA224_Final
#endif
#ifndef SHA224_End
#define SHA224_End _libmd_SHA224_End
#endif
#ifndef SHA224_Fd
#define SHA224_Fd _libmd_SHA224_Fd
#endif
#ifndef SHA224_FdChunk
#define SHA224_FdChunk _libmd_SHA224_FdChunk
#endif
#ifndef SHA224_File
#define SHA224_File _libmd_SHA224_File
#endif
#ifndef SHA224_FileChunk
#define SHA224_FileChunk _libmd_SHA224_FileChunk
#endif
#ifndef SHA224_Data
#define SHA224_Data _libmd_SHA224_Data
#endif
#ifndef SHA224_version
#define SHA224_version _libmd_SHA224_version
#endif
void SHA224_Init(SHA224_CTX *);
void SHA224_Update(SHA224_CTX *, const void *, size_t);
void SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
SHA224_CTX *);
#ifndef _KERNEL
char *SHA224_End(SHA224_CTX *, char *);
char *SHA224_Data(const void *, unsigned int, char *);
char *SHA224_Fd(int, char *);
char *SHA224_FdChunk(int, char *, off_t, off_t);
char *SHA224_File(const char *, char *);
char *SHA224_FileChunk(const char *, char *, off_t, off_t);
#endif
__END_DECLS
#endif /* !_SHA224_H_ */
+99
View File
@@ -0,0 +1,99 @@
/*
* Copyright 2005 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SHA256_H_
#define _SHA256_H_
#ifndef _KERNEL
#include <sys/types.h>
#endif
#define SHA256_BLOCK_LENGTH 64
#define SHA256_DIGEST_LENGTH 32
#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1)
typedef struct SHA256Context {
uint32_t state[8];
uint64_t count;
uint8_t buf[SHA256_BLOCK_LENGTH];
} SHA256_CTX;
__BEGIN_DECLS
/* Ensure libmd symbols do not clash with libcrypto */
#ifndef SHA256_Init
#define SHA256_Init _libmd_SHA256_Init
#endif
#ifndef SHA256_Update
#define SHA256_Update _libmd_SHA256_Update
#endif
#ifndef SHA256_Final
#define SHA256_Final _libmd_SHA256_Final
#endif
#ifndef SHA256_End
#define SHA256_End _libmd_SHA256_End
#endif
#ifndef SHA256_Fd
#define SHA256_Fd _libmd_SHA256_Fd
#endif
#ifndef SHA256_FdChunk
#define SHA256_FdChunk _libmd_SHA256_FdChunk
#endif
#ifndef SHA256_File
#define SHA256_File _libmd_SHA256_File
#endif
#ifndef SHA256_FileChunk
#define SHA256_FileChunk _libmd_SHA256_FileChunk
#endif
#ifndef SHA256_Data
#define SHA256_Data _libmd_SHA256_Data
#endif
#ifndef SHA256_Transform
#define SHA256_Transform _libmd_SHA256_Transform
#endif
#ifndef SHA256_version
#define SHA256_version _libmd_SHA256_version
#endif
void SHA256_Init(SHA256_CTX *);
void SHA256_Update(SHA256_CTX *, const void *, size_t);
void SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)],
SHA256_CTX *);
#ifndef _KERNEL
char *SHA256_End(SHA256_CTX *, char *);
char *SHA256_Data(const void *, unsigned int, char *);
char *SHA256_Fd(int, char *);
char *SHA256_FdChunk(int, char *, off_t, off_t);
char *SHA256_File(const char *, char *);
char *SHA256_FileChunk(const char *, char *, off_t, off_t);
#endif
__END_DECLS
#endif /* !_SHA256_H_ */
+378
View File
@@ -0,0 +1,378 @@
/*
* Copyright 2005 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#ifdef _KERNEL
#include <sys/systm.h>
#else
#include <string.h>
#endif
#include <sys/byteorder.h>
#include <sys/endian.h>
#include "sha224.h"
#include "sha256.h"
#if BYTE_ORDER == BIG_ENDIAN
/* Copy a vector of big-endian uint32_t into a vector of bytes */
#define be32enc_vect(dst, src, len) \
memcpy((void *)dst, (const void *)src, (size_t)len)
/* Copy a vector of bytes into a vector of big-endian uint32_t */
#define be32dec_vect(dst, src, len) \
memcpy((void *)dst, (const void *)src, (size_t)len)
#else /* BYTE_ORDER != BIG_ENDIAN */
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static void
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++)
be32enc(dst + i * 4, src[i]);
}
/*
* Decode a big-endian length len vector of (unsigned char) into a length
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
*/
static void
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
{
size_t i;
for (i = 0; i < len / 4; i++)
dst[i] = be32dec(src + i * 4);
}
#endif /* BYTE_ORDER != BIG_ENDIAN */
/* SHA256 round constants. */
static const uint32_t K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
h += S1(e) + Ch(e, f, g) + k; \
d += h; \
h += S0(a) + Maj(a, b, c);
/* Adjusted round function for rotating state */
#define RNDr(S, W, i, ii) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i + ii] + K[i + ii])
/* Message schedule computation */
#define MSCH(W, ii, i) \
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \
s0(W[i + ii + 1]) + W[i + ii]
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
static void
SHA256_Transform(uint32_t *state, const unsigned char block[64])
{
uint32_t W[64];
uint32_t S[8];
int i;
/* 1. Prepare the first part of the message schedule W. */
be32dec_vect(W, block, 64);
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
for (i = 0; i < 64; i += 16) {
RNDr(S, W, 0, i);
RNDr(S, W, 1, i);
RNDr(S, W, 2, i);
RNDr(S, W, 3, i);
RNDr(S, W, 4, i);
RNDr(S, W, 5, i);
RNDr(S, W, 6, i);
RNDr(S, W, 7, i);
RNDr(S, W, 8, i);
RNDr(S, W, 9, i);
RNDr(S, W, 10, i);
RNDr(S, W, 11, i);
RNDr(S, W, 12, i);
RNDr(S, W, 13, i);
RNDr(S, W, 14, i);
RNDr(S, W, 15, i);
if (i == 48)
break;
MSCH(W, 0, i);
MSCH(W, 1, i);
MSCH(W, 2, i);
MSCH(W, 3, i);
MSCH(W, 4, i);
MSCH(W, 5, i);
MSCH(W, 6, i);
MSCH(W, 7, i);
MSCH(W, 8, i);
MSCH(W, 9, i);
MSCH(W, 10, i);
MSCH(W, 11, i);
MSCH(W, 12, i);
MSCH(W, 13, i);
MSCH(W, 14, i);
MSCH(W, 15, i);
}
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
static unsigned char PAD[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* Add padding and terminating bit-count. */
static void
SHA256_Pad(SHA256_CTX * ctx)
{
size_t r;
/* Figure out how many bytes we have buffered. */
r = (ctx->count >> 3) & 0x3f;
/* Pad to 56 mod 64, transforming if we finish a block en route. */
if (r < 56) {
/* Pad to 56 mod 64. */
memcpy(&ctx->buf[r], PAD, 56 - r);
} else {
/* Finish the current block and mix. */
memcpy(&ctx->buf[r], PAD, 64 - r);
SHA256_Transform(ctx->state, ctx->buf);
/* The start of the final block is all zeroes. */
memset(&ctx->buf[0], 0, 56);
}
/* Add the terminating bit-count. */
be64enc(&ctx->buf[56], ctx->count);
/* Mix in the final block. */
SHA256_Transform(ctx->state, ctx->buf);
}
/* SHA-256 initialization. Begins a SHA-256 operation. */
void
SHA256_Init(SHA256_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count = 0;
/* Magic initialization constants */
ctx->state[0] = 0x6A09E667;
ctx->state[1] = 0xBB67AE85;
ctx->state[2] = 0x3C6EF372;
ctx->state[3] = 0xA54FF53A;
ctx->state[4] = 0x510E527F;
ctx->state[5] = 0x9B05688C;
ctx->state[6] = 0x1F83D9AB;
ctx->state[7] = 0x5BE0CD19;
}
/* Add bytes into the hash */
void
SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
{
uint64_t bitlen;
uint32_t r;
const unsigned char *src = in;
/* Number of bytes left in the buffer from previous updates */
r = (ctx->count >> 3) & 0x3f;
/* Convert the length into a number of bits */
bitlen = len << 3;
/* Update number of bits */
ctx->count += bitlen;
/* Handle the case where we don't need to perform any transforms */
if (len < 64 - r) {
memcpy(&ctx->buf[r], src, len);
return;
}
/* Finish the current block */
memcpy(&ctx->buf[r], src, 64 - r);
SHA256_Transform(ctx->state, ctx->buf);
src += 64 - r;
len -= 64 - r;
/* Perform complete blocks */
while (len >= 64) {
SHA256_Transform(ctx->state, src);
src += 64;
len -= 64;
}
/* Copy left over data into buffer */
memcpy(ctx->buf, src, len);
}
/*
* SHA-256 finalization. Pads the input data, exports the hash value,
* and clears the context state.
*/
void
SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
{
/* Add padding */
SHA256_Pad(ctx);
/* Write the hash */
be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
/* SHA-224: ******************************************************* */
/*
* the SHA224 and SHA256 transforms are identical
*/
/* SHA-224 initialization. Begins a SHA-224 operation. */
void
SHA224_Init(SHA224_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count = 0;
/* Magic initialization constants */
ctx->state[0] = 0xC1059ED8;
ctx->state[1] = 0x367CD507;
ctx->state[2] = 0x3070DD17;
ctx->state[3] = 0xF70E5939;
ctx->state[4] = 0xFFC00B31;
ctx->state[5] = 0x68581511;
ctx->state[6] = 0x64f98FA7;
ctx->state[7] = 0xBEFA4FA4;
}
/* Add bytes into the SHA-224 hash */
void
SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
{
SHA256_Update((SHA256_CTX *)ctx, in, len);
}
/*
* SHA-224 finalization. Pads the input data, exports the hash value,
* and clears the context state.
*/
void
SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
{
/* Add padding */
SHA256_Pad((SHA256_CTX *)ctx);
/* Write the hash */
be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
#ifdef WEAK_REFS
/*
* When building libmd, provide weak references. Note: this is not
* activated in the context of compiling these sources for internal
* use in libcrypt.
*/
#undef SHA256_Init
__weak_reference(_libmd_SHA256_Init, SHA256_Init);
#undef SHA256_Update
__weak_reference(_libmd_SHA256_Update, SHA256_Update);
#undef SHA256_Final
__weak_reference(_libmd_SHA256_Final, SHA256_Final);
#undef SHA256_Transform
__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
#undef SHA224_Init
__weak_reference(_libmd_SHA224_Init, SHA224_Init);
#undef SHA224_Update
__weak_reference(_libmd_SHA224_Update, SHA224_Update);
#undef SHA224_Final
__weak_reference(_libmd_SHA224_Final, SHA224_Final);
#endif
+96
View File
@@ -0,0 +1,96 @@
/*
* Copyright 2005 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SHA384_H_
#define _SHA384_H_
#ifndef _KERNEL
#include <sys/types.h>
#endif
#define SHA384_BLOCK_LENGTH 128
#define SHA384_DIGEST_LENGTH 48
#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1)
typedef struct SHA384Context {
uint64_t state[8];
uint64_t count[2];
uint8_t buf[SHA384_BLOCK_LENGTH];
} SHA384_CTX;
__BEGIN_DECLS
/* Ensure libmd symbols do not clash with libcrypto */
#ifndef SHA384_Init
#define SHA384_Init _libmd_SHA384_Init
#endif
#ifndef SHA384_Update
#define SHA384_Update _libmd_SHA384_Update
#endif
#ifndef SHA384_Final
#define SHA384_Final _libmd_SHA384_Final
#endif
#ifndef SHA384_End
#define SHA384_End _libmd_SHA384_End
#endif
#ifndef SHA384_Fd
#define SHA384_Fd _libmd_SHA384_Fd
#endif
#ifndef SHA384_FdChunk
#define SHA384_FdChunk _libmd_SHA384_FdChunk
#endif
#ifndef SHA384_File
#define SHA384_File _libmd_SHA384_File
#endif
#ifndef SHA384_FileChunk
#define SHA384_FileChunk _libmd_SHA384_FileChunk
#endif
#ifndef SHA384_Data
#define SHA384_Data _libmd_SHA384_Data
#endif
#ifndef SHA384_version
#define SHA384_version _libmd_SHA384_version
#endif
void SHA384_Init(SHA384_CTX *);
void SHA384_Update(SHA384_CTX *, const void *, size_t);
void SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)],
SHA384_CTX *);
#ifndef _KERNEL
char *SHA384_End(SHA384_CTX *, char *);
char *SHA384_Data(const void *, unsigned int, char *);
char *SHA384_Fd(int, char *);
char *SHA384_FdChunk(int, char *, off_t, off_t);
char *SHA384_File(const char *, char *);
char *SHA384_FileChunk(const char *, char *, off_t, off_t);
#endif
__END_DECLS
#endif /* !_SHA384_H_ */
+101
View File
@@ -0,0 +1,101 @@
/*
* Copyright 2005 Colin Percival
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SHA512_H_
#define _SHA512_H_
#ifndef _KERNEL
#include <sys/types.h>
#endif
#define SHA512_BLOCK_LENGTH 128
#define SHA512_DIGEST_LENGTH 64
#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)
typedef struct SHA512Context {
uint64_t state[8];
uint64_t count[2];
uint8_t buf[SHA512_BLOCK_LENGTH];
} SHA512_CTX;
__BEGIN_DECLS
/* Ensure libmd symbols do not clash with libcrypto */
#if 0
#ifndef SHA512_Init
#define SHA512_Init _libmd_SHA512_Init
#endif
#ifndef SHA512_Update
#define SHA512_Update _libmd_SHA512_Update
#endif
#ifndef SHA512_Final
#define SHA512_Final _libmd_SHA512_Final
#endif
#endif
#ifndef SHA512_End
#define SHA512_End _libmd_SHA512_End
#endif
#ifndef SHA512_Fd
#define SHA512_Fd _libmd_SHA512_Fd
#endif
#ifndef SHA512_FdChunk
#define SHA512_FdChunk _libmd_SHA512_FdChunk
#endif
#ifndef SHA512_File
#define SHA512_File _libmd_SHA512_File
#endif
#ifndef SHA512_FileChunk
#define SHA512_FileChunk _libmd_SHA512_FileChunk
#endif
#ifndef SHA512_Data
#define SHA512_Data _libmd_SHA512_Data
#endif
#ifndef SHA512_Transform
#define SHA512_Transform _libmd_SHA512_Transform
#endif
#ifndef SHA512_version
#define SHA512_version _libmd_SHA512_version
#endif
void SHA512_Init(SHA512_CTX *);
void SHA512_Update(SHA512_CTX *, const void *, size_t);
void SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)],
SHA512_CTX *);
#ifndef _KERNEL
char *SHA512_End(SHA512_CTX *, char *);
char *SHA512_Data(const void *, unsigned int, char *);
char *SHA512_Fd(int, char *);
char *SHA512_FdChunk(int, char *, off_t, off_t);
char *SHA512_File(const char *, char *);
char *SHA512_FileChunk(const char *, char *, off_t, off_t);
#endif
__END_DECLS
#endif /* !_SHA512_H_ */
+508
View File
@@ -0,0 +1,508 @@
/*
* Copyright 2005 Colin Percival
* Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/types.h>
#ifdef _KERNEL
#include <sys/systm.h>
#else
#include <string.h>
#endif
#include "sha512.h"
#include "sha512t.h"
#include "sha384.h"
#if BYTE_ORDER == BIG_ENDIAN
/* Copy a vector of big-endian uint64_t into a vector of bytes */
#define be64enc_vect(dst, src, len) \
memcpy((void *)dst, (const void *)src, (size_t)len)
/* Copy a vector of bytes into a vector of big-endian uint64_t */
#define be64dec_vect(dst, src, len) \
memcpy((void *)dst, (const void *)src, (size_t)len)
#else /* BYTE_ORDER != BIG_ENDIAN */
/*
* Encode a length len/4 vector of (uint64_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 8.
*/
static void
be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
{
size_t i;
for (i = 0; i < len / 8; i++)
be64enc(dst + i * 8, src[i]);
}
/*
* Decode a big-endian length len vector of (unsigned char) into a length
* len/4 vector of (uint64_t). Assumes len is a multiple of 8.
*/
static void
be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
{
size_t i;
for (i = 0; i < len / 8; i++)
dst[i] = be64dec(src + i * 8);
}
#endif /* BYTE_ORDER != BIG_ENDIAN */
/* SHA512 round constants. */
static const uint64_t K[80] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};
/* Elementary functions used by SHA512 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define SHR(x, n) (x >> n)
#define ROTR(x, n) ((x >> n) | (x << (64 - n)))
#define S0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
#define S1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
#define s0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
#define s1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
/* SHA512 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
h += S1(e) + Ch(e, f, g) + k; \
d += h; \
h += S0(a) + Maj(a, b, c);
/* Adjusted round function for rotating state */
#define RNDr(S, W, i, ii) \
RND(S[(80 - i) % 8], S[(81 - i) % 8], \
S[(82 - i) % 8], S[(83 - i) % 8], \
S[(84 - i) % 8], S[(85 - i) % 8], \
S[(86 - i) % 8], S[(87 - i) % 8], \
W[i + ii] + K[i + ii])
/* Message schedule computation */
#define MSCH(W, ii, i) \
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \
s0(W[i + ii + 1]) + W[i + ii]
/*
* SHA512 block compression function. The 512-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
static void
SHA512_Transform(uint64_t *state,
const unsigned char block[SHA512_BLOCK_LENGTH])
{
uint64_t W[80];
uint64_t S[8];
int i;
/* 1. Prepare the first part of the message schedule W. */
be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
/* 2. Initialize working variables. */
memcpy(S, state, SHA512_DIGEST_LENGTH);
/* 3. Mix. */
for (i = 0; i < 80; i += 16) {
RNDr(S, W, 0, i);
RNDr(S, W, 1, i);
RNDr(S, W, 2, i);
RNDr(S, W, 3, i);
RNDr(S, W, 4, i);
RNDr(S, W, 5, i);
RNDr(S, W, 6, i);
RNDr(S, W, 7, i);
RNDr(S, W, 8, i);
RNDr(S, W, 9, i);
RNDr(S, W, 10, i);
RNDr(S, W, 11, i);
RNDr(S, W, 12, i);
RNDr(S, W, 13, i);
RNDr(S, W, 14, i);
RNDr(S, W, 15, i);
if (i == 64)
break;
MSCH(W, 0, i);
MSCH(W, 1, i);
MSCH(W, 2, i);
MSCH(W, 3, i);
MSCH(W, 4, i);
MSCH(W, 5, i);
MSCH(W, 6, i);
MSCH(W, 7, i);
MSCH(W, 8, i);
MSCH(W, 9, i);
MSCH(W, 10, i);
MSCH(W, 11, i);
MSCH(W, 12, i);
MSCH(W, 13, i);
MSCH(W, 14, i);
MSCH(W, 15, i);
}
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* Add padding and terminating bit-count. */
static void
SHA512_Pad(SHA512_CTX * ctx)
{
size_t r;
/* Figure out how many bytes we have buffered. */
r = (ctx->count[1] >> 3) & 0x7f;
/* Pad to 112 mod 128, transforming if we finish a block en route. */
if (r < 112) {
/* Pad to 112 mod 128. */
memcpy(&ctx->buf[r], PAD, 112 - r);
} else {
/* Finish the current block and mix. */
memcpy(&ctx->buf[r], PAD, 128 - r);
SHA512_Transform(ctx->state, ctx->buf);
/* The start of the final block is all zeroes. */
memset(&ctx->buf[0], 0, 112);
}
/* Add the terminating bit-count. */
be64enc_vect(&ctx->buf[112], ctx->count, 16);
/* Mix in the final block. */
SHA512_Transform(ctx->state, ctx->buf);
}
/* SHA-512 initialization. Begins a SHA-512 operation. */
void
SHA512_Init(SHA512_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count[0] = ctx->count[1] = 0;
/* Magic initialization constants */
ctx->state[0] = 0x6a09e667f3bcc908ULL;
ctx->state[1] = 0xbb67ae8584caa73bULL;
ctx->state[2] = 0x3c6ef372fe94f82bULL;
ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
ctx->state[4] = 0x510e527fade682d1ULL;
ctx->state[5] = 0x9b05688c2b3e6c1fULL;
ctx->state[6] = 0x1f83d9abfb41bd6bULL;
ctx->state[7] = 0x5be0cd19137e2179ULL;
}
/* Add bytes into the hash */
void
SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
{
uint64_t bitlen[2];
uint64_t r;
const unsigned char *src = in;
/* Number of bytes left in the buffer from previous updates */
r = (ctx->count[1] >> 3) & 0x7f;
/* Convert the length into a number of bits */
bitlen[1] = ((uint64_t)len) << 3;
bitlen[0] = ((uint64_t)len) >> 61;
/* Update number of bits */
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
ctx->count[0]++;
ctx->count[0] += bitlen[0];
/* Handle the case where we don't need to perform any transforms */
if (len < SHA512_BLOCK_LENGTH - r) {
memcpy(&ctx->buf[r], src, len);
return;
}
/* Finish the current block */
memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
SHA512_Transform(ctx->state, ctx->buf);
src += SHA512_BLOCK_LENGTH - r;
len -= SHA512_BLOCK_LENGTH - r;
/* Perform complete blocks */
while (len >= SHA512_BLOCK_LENGTH) {
SHA512_Transform(ctx->state, src);
src += SHA512_BLOCK_LENGTH;
len -= SHA512_BLOCK_LENGTH;
}
/* Copy left over data into buffer */
memcpy(ctx->buf, src, len);
}
/*
* SHA-512 finalization. Pads the input data, exports the hash value,
* and clears the context state.
*/
void
SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
{
/* Add padding */
SHA512_Pad(ctx);
/* Write the hash */
be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
/* SHA-512t: ******************************************************** */
/*
* the SHA512t transforms are identical to SHA512 so reuse the existing function
*/
void
SHA512_224_Init(SHA512_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count[0] = ctx->count[1] = 0;
/* Magic initialization constants */
ctx->state[0] = 0x8c3d37c819544da2ULL;
ctx->state[1] = 0x73e1996689dcd4d6ULL;
ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
ctx->state[3] = 0x679dd514582f9fcfULL;
ctx->state[4] = 0x0f6d2b697bd44da8ULL;
ctx->state[5] = 0x77e36f7304c48942ULL;
ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
ctx->state[7] = 0x1112e6ad91d692a1ULL;
}
void
SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
{
SHA512_Update(ctx, in, len);
}
void
SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH],
SHA512_CTX *ctx)
{
/* Add padding */
SHA512_Pad(ctx);
/* Write the hash */
be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
void
SHA512_256_Init(SHA512_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count[0] = ctx->count[1] = 0;
/* Magic initialization constants */
ctx->state[0] = 0x22312194fc2bf72cULL;
ctx->state[1] = 0x9f555fa3c84c64c2ULL;
ctx->state[2] = 0x2393b86b6f53b151ULL;
ctx->state[3] = 0x963877195940eabdULL;
ctx->state[4] = 0x96283ee2a88effe3ULL;
ctx->state[5] = 0xbe5e1e2553863992ULL;
ctx->state[6] = 0x2b0199fc2c85b8aaULL;
ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
}
void
SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
{
SHA512_Update(ctx, in, len);
}
void
SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH],
SHA512_CTX * ctx)
{
/* Add padding */
SHA512_Pad(ctx);
/* Write the hash */
be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
/* ** SHA-384: ******************************************************** */
/*
* the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
*/
/* SHA-384 initialization. Begins a SHA-384 operation. */
void
SHA384_Init(SHA384_CTX * ctx)
{
/* Zero bits processed so far */
ctx->count[0] = ctx->count[1] = 0;
/* Magic initialization constants */
ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
ctx->state[1] = 0x629a292a367cd507ULL;
ctx->state[2] = 0x9159015a3070dd17ULL;
ctx->state[3] = 0x152fecd8f70e5939ULL;
ctx->state[4] = 0x67332667ffc00b31ULL;
ctx->state[5] = 0x8eb44a8768581511ULL;
ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
ctx->state[7] = 0x47b5481dbefa4fa4ULL;
}
/* Add bytes into the SHA-384 hash */
void
SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
{
SHA512_Update((SHA512_CTX *)ctx, in, len);
}
/*
* SHA-384 finalization. Pads the input data, exports the hash value,
* and clears the context state.
*/
void
SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
{
/* Add padding */
SHA512_Pad((SHA512_CTX *)ctx);
/* Write the hash */
be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
/* Clear the context state */
explicit_bzero(ctx, sizeof (*ctx));
}
#if 0
/*
* When building libmd, provide weak references. Note: this is not
* activated in the context of compiling these sources for internal
* use in libcrypt.
*/
#undef SHA512_Init
__weak_reference(_libmd_SHA512_Init, SHA512_Init);
#undef SHA512_Update
__weak_reference(_libmd_SHA512_Update, SHA512_Update);
#undef SHA512_Final
__weak_reference(_libmd_SHA512_Final, SHA512_Final);
#undef SHA512_Transform
__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
#undef SHA512_224_Init
__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
#undef SHA512_224_Update
__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
#undef SHA512_224_Final
__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
#undef SHA512_256_Init
__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
#undef SHA512_256_Update
__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
#undef SHA512_256_Final
__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
#undef SHA384_Init
__weak_reference(_libmd_SHA384_Init, SHA384_Init);
#undef SHA384_Update
__weak_reference(_libmd_SHA384_Update, SHA384_Update);
#undef SHA384_Final
__weak_reference(_libmd_SHA384_Final, SHA384_Final);
#endif
+143
View File
@@ -0,0 +1,143 @@
/*
* Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SHA512T_H_
#define _SHA512T_H_
#include "sha512.h"
#ifndef _KERNEL
#include <sys/types.h>
#endif
#define SHA512_224_DIGEST_LENGTH 28
#define SHA512_224_DIGEST_STRING_LENGTH (SHA512_224_DIGEST_LENGTH * 2 + 1)
#define SHA512_256_DIGEST_LENGTH 32
#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1)
__BEGIN_DECLS
/* Ensure libmd symbols do not clash with libcrypto */
#ifndef SHA512_224_Init
#define SHA512_224_Init _libmd_SHA512_224_Init
#endif
#ifndef SHA512_224_Update
#define SHA512_224_Update _libmd_SHA512_224_Update
#endif
#ifndef SHA512_224_Final
#define SHA512_224_Final _libmd_SHA512_224_Final
#endif
#ifndef SHA512_224_End
#define SHA512_224_End _libmd_SHA512_224_End
#endif
#ifndef SHA512_224_Fd
#define SHA512_224_Fd _libmd_SHA512_224_Fd
#endif
#ifndef SHA512_224_FdChunk
#define SHA512_224_FdChunk _libmd_SHA512_224_FdChunk
#endif
#ifndef SHA512_224_File
#define SHA512_224_File _libmd_SHA512_224_File
#endif
#ifndef SHA512_224_FileChunk
#define SHA512_224_FileChunk _libmd_SHA512_224_FileChunk
#endif
#ifndef SHA512_224_Data
#define SHA512_224_Data _libmd_SHA512_224_Data
#endif
#ifndef SHA512_224_Transform
#define SHA512_224_Transform _libmd_SHA512_224_Transform
#endif
#ifndef SHA512_224_version
#define SHA512_224_version _libmd_SHA512_224_version
#endif
#ifndef SHA512_256_Init
#define SHA512_256_Init _libmd_SHA512_256_Init
#endif
#ifndef SHA512_256_Update
#define SHA512_256_Update _libmd_SHA512_256_Update
#endif
#ifndef SHA512_256_Final
#define SHA512_256_Final _libmd_SHA512_256_Final
#endif
#ifndef SHA512_256_End
#define SHA512_256_End _libmd_SHA512_256_End
#endif
#ifndef SHA512_256_Fd
#define SHA512_256_Fd _libmd_SHA512_256_Fd
#endif
#ifndef SHA512_256_FdChunk
#define SHA512_256_FdChunk _libmd_SHA512_256_FdChunk
#endif
#ifndef SHA512_256_File
#define SHA512_256_File _libmd_SHA512_256_File
#endif
#ifndef SHA512_256_FileChunk
#define SHA512_256_FileChunk _libmd_SHA512_256_FileChunk
#endif
#ifndef SHA512_256_Data
#define SHA512_256_Data _libmd_SHA512_256_Data
#endif
#ifndef SHA512_256_Transform
#define SHA512_256_Transform _libmd_SHA512_256_Transform
#endif
#ifndef SHA512_256_version
#define SHA512_256_version _libmd_SHA512_256_version
#endif
void SHA512_224_Init(SHA512_CTX *);
void SHA512_224_Update(SHA512_CTX *, const void *, size_t);
void SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)],
SHA512_CTX *);
#ifndef _KERNEL
char *SHA512_224_End(SHA512_CTX *, char *);
char *SHA512_224_Data(const void *, unsigned int, char *);
char *SHA512_224_Fd(int, char *);
char *SHA512_224_FdChunk(int, char *, off_t, off_t);
char *SHA512_224_File(const char *, char *);
char *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
#endif
void SHA512_256_Init(SHA512_CTX *);
void SHA512_256_Update(SHA512_CTX *, const void *, size_t);
void SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)],
SHA512_CTX *);
#ifndef _KERNEL
char *SHA512_256_End(SHA512_CTX *, char *);
char *SHA512_256_Data(const void *, unsigned int, char *);
char *SHA512_256_Fd(int, char *);
char *SHA512_256_FdChunk(int, char *, off_t, off_t);
char *SHA512_256_File(const char *, char *);
char *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
#endif
__END_DECLS
#endif /* !_SHA512T_H_ */
+222
View File
@@ -0,0 +1,222 @@
/*
* Copyright (c) 2008, 2009 Edward Tomasz Napierała <trasz@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/zfs_acl.h>
#include <sys/acl.h>
struct zfs2bsd {
uint32_t zb_zfs;
int zb_bsd;
};
struct zfs2bsd perms[] = {{ACE_READ_DATA, ACL_READ_DATA},
{ACE_WRITE_DATA, ACL_WRITE_DATA},
{ACE_EXECUTE, ACL_EXECUTE},
{ACE_APPEND_DATA, ACL_APPEND_DATA},
{ACE_DELETE_CHILD, ACL_DELETE_CHILD},
{ACE_DELETE, ACL_DELETE},
{ACE_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES},
{ACE_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES},
{ACE_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS},
{ACE_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS},
{ACE_READ_ACL, ACL_READ_ACL},
{ACE_WRITE_ACL, ACL_WRITE_ACL},
{ACE_WRITE_OWNER, ACL_WRITE_OWNER},
{ACE_SYNCHRONIZE, ACL_SYNCHRONIZE},
{0, 0}};
struct zfs2bsd flags[] = {{ACE_FILE_INHERIT_ACE,
ACL_ENTRY_FILE_INHERIT},
{ACE_DIRECTORY_INHERIT_ACE,
ACL_ENTRY_DIRECTORY_INHERIT},
{ACE_NO_PROPAGATE_INHERIT_ACE,
ACL_ENTRY_NO_PROPAGATE_INHERIT},
{ACE_INHERIT_ONLY_ACE,
ACL_ENTRY_INHERIT_ONLY},
{ACE_INHERITED_ACE,
ACL_ENTRY_INHERITED},
{ACE_SUCCESSFUL_ACCESS_ACE_FLAG,
ACL_ENTRY_SUCCESSFUL_ACCESS},
{ACE_FAILED_ACCESS_ACE_FLAG,
ACL_ENTRY_FAILED_ACCESS},
{0, 0}};
static int
_bsd_from_zfs(uint32_t zfs, const struct zfs2bsd *table)
{
const struct zfs2bsd *tmp;
int bsd = 0;
for (tmp = table; tmp->zb_zfs != 0; tmp++) {
if (zfs & tmp->zb_zfs)
bsd |= tmp->zb_bsd;
}
return (bsd);
}
static uint32_t
_zfs_from_bsd(int bsd, const struct zfs2bsd *table)
{
const struct zfs2bsd *tmp;
uint32_t zfs = 0;
for (tmp = table; tmp->zb_bsd != 0; tmp++) {
if (bsd & tmp->zb_bsd)
zfs |= tmp->zb_zfs;
}
return (zfs);
}
int
acl_from_aces(struct acl *aclp, const ace_t *aces, int nentries)
{
int i;
struct acl_entry *entry;
const ace_t *ace;
if (nentries < 1) {
printf("acl_from_aces: empty ZFS ACL; returning EINVAL.\n");
return (EINVAL);
}
if (nentries > ACL_MAX_ENTRIES) {
/*
* I believe it may happen only when moving a pool
* from SunOS to FreeBSD.
*/
printf("acl_from_aces: ZFS ACL too big to fit "
"into 'struct acl'; returning EINVAL.\n");
return (EINVAL);
}
bzero(aclp, sizeof (*aclp));
aclp->acl_maxcnt = ACL_MAX_ENTRIES;
aclp->acl_cnt = nentries;
for (i = 0; i < nentries; i++) {
entry = &(aclp->acl_entry[i]);
ace = &(aces[i]);
if (ace->a_flags & ACE_OWNER)
entry->ae_tag = ACL_USER_OBJ;
else if (ace->a_flags & ACE_GROUP)
entry->ae_tag = ACL_GROUP_OBJ;
else if (ace->a_flags & ACE_EVERYONE)
entry->ae_tag = ACL_EVERYONE;
else if (ace->a_flags & ACE_IDENTIFIER_GROUP)
entry->ae_tag = ACL_GROUP;
else
entry->ae_tag = ACL_USER;
if (entry->ae_tag == ACL_USER || entry->ae_tag == ACL_GROUP)
entry->ae_id = ace->a_who;
else
entry->ae_id = ACL_UNDEFINED_ID;
entry->ae_perm = _bsd_from_zfs(ace->a_access_mask, perms);
entry->ae_flags = _bsd_from_zfs(ace->a_flags, flags);
switch (ace->a_type) {
case ACE_ACCESS_ALLOWED_ACE_TYPE:
entry->ae_entry_type = ACL_ENTRY_TYPE_ALLOW;
break;
case ACE_ACCESS_DENIED_ACE_TYPE:
entry->ae_entry_type = ACL_ENTRY_TYPE_DENY;
break;
case ACE_SYSTEM_AUDIT_ACE_TYPE:
entry->ae_entry_type = ACL_ENTRY_TYPE_AUDIT;
break;
case ACE_SYSTEM_ALARM_ACE_TYPE:
entry->ae_entry_type = ACL_ENTRY_TYPE_ALARM;
break;
default:
panic("acl_from_aces: a_type is 0x%x", ace->a_type);
}
}
return (0);
}
void
aces_from_acl(ace_t *aces, int *nentries, const struct acl *aclp)
{
int i;
const struct acl_entry *entry;
ace_t *ace;
bzero(aces, sizeof (*aces) * aclp->acl_cnt);
*nentries = aclp->acl_cnt;
for (i = 0; i < aclp->acl_cnt; i++) {
entry = &(aclp->acl_entry[i]);
ace = &(aces[i]);
ace->a_who = entry->ae_id;
if (entry->ae_tag == ACL_USER_OBJ)
ace->a_flags = ACE_OWNER;
else if (entry->ae_tag == ACL_GROUP_OBJ)
ace->a_flags = (ACE_GROUP | ACE_IDENTIFIER_GROUP);
else if (entry->ae_tag == ACL_GROUP)
ace->a_flags = ACE_IDENTIFIER_GROUP;
else if (entry->ae_tag == ACL_EVERYONE)
ace->a_flags = ACE_EVERYONE;
else /* ACL_USER */
ace->a_flags = 0;
ace->a_access_mask = _zfs_from_bsd(entry->ae_perm, perms);
ace->a_flags |= _zfs_from_bsd(entry->ae_flags, flags);
switch (entry->ae_entry_type) {
case ACL_ENTRY_TYPE_ALLOW:
ace->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
break;
case ACL_ENTRY_TYPE_DENY:
ace->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
break;
case ACL_ENTRY_TYPE_ALARM:
ace->a_type = ACE_SYSTEM_ALARM_ACE_TYPE;
break;
case ACL_ENTRY_TYPE_AUDIT:
ace->a_type = ACE_SYSTEM_AUDIT_ACE_TYPE;
break;
default:
panic("aces_from_acl: ae_entry_type is 0x%x",
entry->ae_entry_type);
}
}
}
+138
View File
@@ -0,0 +1,138 @@
/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/atomic.h>
#ifdef _KERNEL
#include <sys/kernel.h>
struct mtx atomic_mtx;
MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF);
#else
#include <pthread.h>
#define mtx_lock(lock) pthread_mutex_lock(lock)
#define mtx_unlock(lock) pthread_mutex_unlock(lock)
static pthread_mutex_t atomic_mtx;
static __attribute__((constructor)) void
atomic_init(void)
{
pthread_mutex_init(&atomic_mtx, NULL);
}
#endif
#if !defined(__LP64__) && !defined(__mips_n32) && \
!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64)
void
atomic_add_64(volatile uint64_t *target, int64_t delta)
{
mtx_lock(&atomic_mtx);
*target += delta;
mtx_unlock(&atomic_mtx);
}
void
atomic_dec_64(volatile uint64_t *target)
{
mtx_lock(&atomic_mtx);
*target -= 1;
mtx_unlock(&atomic_mtx);
}
#endif
uint64_t
atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
{
uint64_t newval;
mtx_lock(&atomic_mtx);
newval = (*target += delta);
mtx_unlock(&atomic_mtx);
return (newval);
}
#if defined(__powerpc__) || defined(__arm__) || defined(__mips__)
void
atomic_or_8(volatile uint8_t *target, uint8_t value)
{
mtx_lock(&atomic_mtx);
*target |= value;
mtx_unlock(&atomic_mtx);
}
#endif
uint8_t
atomic_or_8_nv(volatile uint8_t *target, uint8_t value)
{
uint8_t newval;
mtx_lock(&atomic_mtx);
newval = (*target |= value);
mtx_unlock(&atomic_mtx);
return (newval);
}
uint64_t
atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval)
{
uint64_t oldval;
mtx_lock(&atomic_mtx);
oldval = *target;
if (oldval == cmp)
*target = newval;
mtx_unlock(&atomic_mtx);
return (oldval);
}
uint32_t
atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval)
{
uint32_t oldval;
mtx_lock(&atomic_mtx);
oldval = *target;
if (oldval == cmp)
*target = newval;
mtx_unlock(&atomic_mtx);
return (oldval);
}
void
membar_producer(void)
{
/* nothing */
}
+74
View File
@@ -0,0 +1,74 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* $FreeBSD$
*/
/*
* Copyright 2007 John Birrell <jb@FreeBSD.org>. All rights reserved.
* Copyright 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#include <sys/cmn_err.h>
void
vcmn_err(int ce, const char *fmt, va_list adx)
{
char buf[256];
const char *prefix;
prefix = NULL; /* silence unwitty compilers */
switch (ce) {
case CE_CONT:
prefix = "Solaris(cont): ";
break;
case CE_NOTE:
prefix = "Solaris: NOTICE: ";
break;
case CE_WARN:
prefix = "Solaris: WARNING: ";
break;
case CE_PANIC:
prefix = "Solaris(panic): ";
break;
case CE_IGNORE:
break;
default:
panic("Solaris: unknown severity level");
}
if (ce == CE_PANIC) {
vsnprintf(buf, sizeof (buf), fmt, adx);
panic("%s%s", prefix, buf);
}
if (ce != CE_IGNORE) {
printf("%s", prefix);
vprintf(fmt, adx);
printf("\n");
}
}
void
cmn_err(int type, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vcmn_err(type, fmt, ap);
va_end(ap);
}
+37
View File
@@ -0,0 +1,37 @@
/*
* Copyright 2014 The FreeBSD Project.
* All rights reserved.
*
* This software was developed by Steven Hartland.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/sdt.h>
/* CSTYLED */
SDT_PROBE_DEFINE1(sdt, , , set__error, "int");
+351
View File
@@ -0,0 +1,351 @@
/*
* Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/byteorder.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/debug.h>
#include <sys/mutex.h>
#include <sys/vmmeter.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#ifdef KMEM_DEBUG
#include <sys/queue.h>
#include <sys/stack.h>
#endif
#ifdef _KERNEL
MALLOC_DEFINE(M_SOLARIS, "solaris", "Solaris");
#else
#define malloc(size, type, flags) malloc(size)
#define free(addr, type) free(addr)
#endif
#ifdef KMEM_DEBUG
struct kmem_item {
struct stack stack;
LIST_ENTRY(kmem_item) next;
};
static LIST_HEAD(, kmem_item) kmem_items;
static struct mtx kmem_items_mtx;
MTX_SYSINIT(kmem_items_mtx, &kmem_items_mtx, "kmem_items", MTX_DEF);
#endif /* KMEM_DEBUG */
#include <sys/vmem.h>
void *
zfs_kmem_alloc(size_t size, int kmflags)
{
void *p;
#ifdef KMEM_DEBUG
struct kmem_item *i;
size += sizeof (struct kmem_item);
#endif
p = malloc(MAX(size, 16), M_SOLARIS, kmflags);
#ifndef _KERNEL
if (kmflags & KM_SLEEP)
assert(p != NULL);
#endif
#ifdef KMEM_DEBUG
if (p != NULL) {
i = p;
p = (uint8_t *)p + sizeof (struct kmem_item);
stack_save(&i->stack);
mtx_lock(&kmem_items_mtx);
LIST_INSERT_HEAD(&kmem_items, i, next);
mtx_unlock(&kmem_items_mtx);
}
#endif
return (p);
}
void
zfs_kmem_free(void *buf, size_t size __unused)
{
#ifdef KMEM_DEBUG
if (buf == NULL) {
printf("%s: attempt to free NULL\n", __func__);
return;
}
struct kmem_item *i;
buf = (uint8_t *)buf - sizeof (struct kmem_item);
mtx_lock(&kmem_items_mtx);
LIST_FOREACH(i, &kmem_items, next) {
if (i == buf)
break;
}
ASSERT(i != NULL);
LIST_REMOVE(i, next);
mtx_unlock(&kmem_items_mtx);
memset(buf, 0xDC, MAX(size, 16));
#endif
free(buf, M_SOLARIS);
}
static uint64_t kmem_size_val;
static void
kmem_size_init(void *unused __unused)
{
kmem_size_val = (uint64_t)vm_cnt.v_page_count * PAGE_SIZE;
if (kmem_size_val > vm_kmem_size)
kmem_size_val = vm_kmem_size;
}
SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL);
uint64_t
kmem_size(void)
{
return (kmem_size_val);
}
static int
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
{
struct kmem_cache *cache = private;
return (cache->kc_constructor(mem, cache->kc_private, flags));
}
static void
kmem_std_destructor(void *mem, int size __unused, void *private)
{
struct kmem_cache *cache = private;
cache->kc_destructor(mem, cache->kc_private);
}
kmem_cache_t *
kmem_cache_create(char *name, size_t bufsize, size_t align,
int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags)
{
kmem_cache_t *cache;
ASSERT(vmp == NULL);
cache = kmem_alloc(sizeof (*cache), KM_SLEEP);
strlcpy(cache->kc_name, name, sizeof (cache->kc_name));
cache->kc_constructor = constructor;
cache->kc_destructor = destructor;
cache->kc_private = private;
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
cache->kc_zone = uma_zcreate(cache->kc_name, bufsize,
constructor != NULL ? kmem_std_constructor : NULL,
destructor != NULL ? kmem_std_destructor : NULL,
NULL, NULL, align > 0 ? align - 1 : 0, cflags);
#else
cache->kc_size = bufsize;
#endif
return (cache);
}
void
kmem_cache_destroy(kmem_cache_t *cache)
{
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
uma_zdestroy(cache->kc_zone);
#endif
kmem_free(cache, sizeof (*cache));
}
void *
kmem_cache_alloc(kmem_cache_t *cache, int flags)
{
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
return (uma_zalloc_arg(cache->kc_zone, cache, flags));
#else
void *p;
p = kmem_alloc(cache->kc_size, flags);
if (p != NULL && cache->kc_constructor != NULL)
kmem_std_constructor(p, cache->kc_size, cache, flags);
return (p);
#endif
}
void
kmem_cache_free(kmem_cache_t *cache, void *buf)
{
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
uma_zfree_arg(cache->kc_zone, buf, cache);
#else
if (cache->kc_destructor != NULL)
kmem_std_destructor(buf, cache->kc_size, cache);
kmem_free(buf, cache->kc_size);
#endif
}
/*
* Allow our caller to determine if there are running reaps.
*
* This call is very conservative and may return B_TRUE even when
* reaping activity isn't active. If it returns B_FALSE, then reaping
* activity is definitely inactive.
*/
boolean_t
kmem_cache_reap_active(void)
{
return (B_FALSE);
}
/*
* Reap (almost) everything soon.
*
* Note: this does not wait for the reap-tasks to complete. Caller
* should use kmem_cache_reap_active() (above) and/or moderation to
* avoid scheduling too many reap-tasks.
*/
#ifdef _KERNEL
void
kmem_cache_reap_soon(kmem_cache_t *cache)
{
#ifndef KMEM_DEBUG
#if __FreeBSD_version >= 1300043
uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN);
#else
zone_drain(cache->kc_zone);
#endif
#endif
}
void
kmem_reap(void)
{
#if __FreeBSD_version >= 1300043
uma_reclaim(UMA_RECLAIM_TRIM);
#else
uma_reclaim();
#endif
}
#else
void
kmem_cache_reap_soon(kmem_cache_t *cache __unused)
{
}
void
kmem_reap(void)
{
}
#endif
int
kmem_debugging(void)
{
return (0);
}
void *
calloc(size_t n, size_t s)
{
return (kmem_zalloc(n * s, KM_NOSLEEP));
}
char *
kmem_vasprintf(const char *fmt, va_list adx)
{
char *msg;
va_list adx2;
va_copy(adx2, adx);
msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
(void) vsprintf(msg, fmt, adx2);
va_end(adx2);
return (msg);
}
#include <vm/uma.h>
#include <vm/uma_int.h>
#ifdef KMEM_DEBUG
#error "KMEM_DEBUG not currently supported"
#endif
uint64_t
spl_kmem_cache_inuse(kmem_cache_t *cache)
{
return (uma_zone_get_cur(cache->kc_zone));
}
uint64_t
spl_kmem_cache_entry_size(kmem_cache_t *cache)
{
return (cache->kc_zone->uz_size);
}
/*
* Register a move callback for cache defragmentation.
* XXX: Unimplemented but harmless to stub out for now.
*/
void
spl_kmem_cache_set_move(kmem_cache_t *skc,
kmem_cbrc_t (move)(void *, void *, size_t, void *))
{
ASSERT(move != NULL);
}
#ifdef KMEM_DEBUG
void kmem_show(void *);
void
kmem_show(void *dummy __unused)
{
struct kmem_item *i;
mtx_lock(&kmem_items_mtx);
if (LIST_EMPTY(&kmem_items))
printf("KMEM_DEBUG: No leaked elements.\n");
else {
printf("KMEM_DEBUG: Leaked elements:\n\n");
LIST_FOREACH(i, &kmem_items, next) {
printf("address=%p\n", i);
stack_print_ddb(&i->stack);
printf("\n");
}
}
mtx_unlock(&kmem_items_mtx);
}
SYSUNINIT(sol_kmem, SI_SUB_CPU, SI_ORDER_FIRST, kmem_show, NULL);
#endif /* KMEM_DEBUG */
+321
View File
@@ -0,0 +1,321 @@
/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <sys/kstat.h>
static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
void
__kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index))
{
ksp->ks_raw_ops.headers = headers;
ksp->ks_raw_ops.data = data;
ksp->ks_raw_ops.addr = addr;
}
static int
kstat_default_update(kstat_t *ksp, int rw)
{
ASSERT(ksp != NULL);
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
kstat_t *
__kstat_create(const char *module, int instance, const char *name,
const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
{
struct sysctl_oid *root;
kstat_t *ksp;
KASSERT(instance == 0, ("instance=%d", instance));
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
ASSERT(ks_ndata == 1);
/*
* Allocate the main structure. We don't need to copy module/class/name
* stuff in here, because it is only used for sysctl node creation
* done in this function.
*/
ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
ksp->ks_crtime = gethrtime();
ksp->ks_snaptime = ksp->ks_crtime;
ksp->ks_instance = instance;
strncpy(ksp->ks_name, name, KSTAT_STRLEN);
strncpy(ksp->ks_class, class, KSTAT_STRLEN);
ksp->ks_type = ks_type;
ksp->ks_flags = flags;
ksp->ks_update = kstat_default_update;
switch (ksp->ks_type) {
case KSTAT_TYPE_RAW:
ksp->ks_ndata = 1;
ksp->ks_data_size = ks_ndata;
break;
case KSTAT_TYPE_NAMED:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
break;
case KSTAT_TYPE_INTR:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
break;
case KSTAT_TYPE_IO:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
break;
case KSTAT_TYPE_TIMER:
ksp->ks_ndata = ks_ndata;
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
break;
default:
panic("Undefined kstat type %d\n", ksp->ks_type);
}
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
ksp->ks_data = NULL;
} else {
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
if (ksp->ks_data == NULL) {
kmem_free(ksp, sizeof (*ksp));
ksp = NULL;
}
}
/*
* Create sysctl tree for those statistics:
*
* kstat.<module>.<class>.<name>.
*/
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
"");
if (root == NULL) {
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
OID_AUTO, class, CTLFLAG_RW, 0, "");
if (root == NULL) {
printf("%s: Cannot create kstat.%s.%s tree!\n", __func__,
module, class);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
OID_AUTO, name, CTLFLAG_RW, 0, "");
if (root == NULL) {
printf("%s: Cannot create kstat.%s.%s.%s tree!\n", __func__,
module, class, name);
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
return (NULL);
}
ksp->ks_sysctl_root = root;
return (ksp);
}
static int
kstat_sysctl(SYSCTL_HANDLER_ARGS)
{
kstat_named_t *ksent = arg1;
uint64_t val;
val = ksent->value.ui64;
return (sysctl_handle_64(oidp, &val, 0, req));
}
void
kstat_install(kstat_t *ksp)
{
kstat_named_t *ksent;
char *namelast;
int typelast;
ksent = ksp->ks_data;
if (ksp->ks_ndata == UINT32_MAX) {
#ifdef INVARIANTS
printf("can't handle raw ops yet!!!\n");
#endif
return;
}
if (ksent == NULL) {
printf("%s ksp->ks_data == NULL!!!!\n", __func__);
return;
}
typelast = 0;
namelast = NULL;
for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
if (ksent->data_type != 0) {
typelast = ksent->data_type;
namelast = ksent->name;
}
switch (typelast) {
case KSTAT_DATA_INT32:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_S32 | CTLFLAG_RD, ksent,
sizeof (*ksent), kstat_sysctl, "I",
namelast);
break;
case KSTAT_DATA_UINT32:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_U32 | CTLFLAG_RD, ksent,
sizeof (*ksent), kstat_sysctl, "IU",
namelast);
break;
case KSTAT_DATA_INT64:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_S64 | CTLFLAG_RD, ksent,
sizeof (*ksent), kstat_sysctl, "Q",
namelast);
break;
case KSTAT_DATA_UINT64:
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
OID_AUTO, namelast,
CTLTYPE_U64 | CTLFLAG_RD, ksent,
sizeof (*ksent), kstat_sysctl, "QU",
namelast);
break;
default:
panic("unsupported type: %d", typelast);
}
}
}
void
kstat_delete(kstat_t *ksp)
{
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
free(ksp, M_KSTAT);
}
void
kstat_set_string(char *dst, const char *src)
{
bzero(dst, KSTAT_STRLEN);
(void) strncpy(dst, src, KSTAT_STRLEN - 1);
}
void
kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
{
kstat_set_string(knp->name, name);
knp->data_type = data_type;
}
void
kstat_waitq_enter(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t wcnt;
new = gethrtime();
delta = new - kiop->wlastupdate;
kiop->wlastupdate = new;
wcnt = kiop->wcnt++;
if (wcnt != 0) {
kiop->wlentime += delta * wcnt;
kiop->wtime += delta;
}
}
void
kstat_waitq_exit(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t wcnt;
new = gethrtime();
delta = new - kiop->wlastupdate;
kiop->wlastupdate = new;
wcnt = kiop->wcnt--;
ASSERT((int)wcnt > 0);
kiop->wlentime += delta * wcnt;
kiop->wtime += delta;
}
void
kstat_runq_enter(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t rcnt;
new = gethrtime();
delta = new - kiop->rlastupdate;
kiop->rlastupdate = new;
rcnt = kiop->rcnt++;
if (rcnt != 0) {
kiop->rlentime += delta * rcnt;
kiop->rtime += delta;
}
}
void
kstat_runq_exit(kstat_io_t *kiop)
{
hrtime_t new, delta;
ulong_t rcnt;
new = gethrtime();
delta = new - kiop->rlastupdate;
kiop->rlastupdate = new;
rcnt = kiop->rcnt--;
ASSERT((int)rcnt > 0);
kiop->rlentime += delta * rcnt;
kiop->rtime += delta;
}
+107
View File
@@ -0,0 +1,107 @@
/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/limits.h>
#include <sys/misc.h>
#include <sys/sysctl.h>
#include <sys/zfs_context.h>
char hw_serial[11] = "0";
static struct opensolaris_utsname hw_utsname = {
.machine = MACHINE
};
static void
opensolaris_utsname_init(void *arg)
{
hw_utsname.sysname = ostype;
hw_utsname.nodename = prison0.pr_hostname;
hw_utsname.release = osrelease;
snprintf(hw_utsname.version, sizeof (hw_utsname.version),
"%d", osreldate);
}
char *
kmem_strdup(const char *s)
{
char *buf;
buf = kmem_alloc(strlen(s) + 1, KM_SLEEP);
strcpy(buf, s);
return (buf);
}
int
ddi_copyin(const void *from, void *to, size_t len, int flags)
{
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
if (flags & FKIOCTL) {
memcpy(to, from, len);
return (0);
}
return (copyin(from, to, len));
}
int
ddi_copyout(const void *from, void *to, size_t len, int flags)
{
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
if (flags & FKIOCTL) {
memcpy(to, from, len);
return (0);
}
return (copyout(from, to, len));
}
int
spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vpanic(fmt, ap);
va_end(ap);
}
utsname_t *
utsname(void)
{
return (&hw_utsname);
}
SYSINIT(opensolaris_utsname_init, SI_SUB_TUNABLES, SI_ORDER_ANY,
opensolaris_utsname_init, NULL);
+429
View File
@@ -0,0 +1,429 @@
/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/priv.h>
#include <sys/vnode.h>
#include <sys/mntent.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/jail.h>
#include <sys/policy.h>
#include <sys/zfs_vfsops.h>
int
secpolicy_nfs(cred_t *cr)
{
return (spl_priv_check_cred(cr, PRIV_NFS_DAEMON));
}
int
secpolicy_zfs(cred_t *cr)
{
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
}
int
secpolicy_sys_config(cred_t *cr, int checkonly __unused)
{
return (spl_priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG));
}
int
secpolicy_zinject(cred_t *cr)
{
return (spl_priv_check_cred(cr, PRIV_ZFS_INJECT));
}
int
secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp __unused)
{
return (spl_priv_check_cred(cr, PRIV_VFS_UNMOUNT));
}
int
secpolicy_fs_owner(struct mount *mp, cred_t *cr)
{
if (zfs_super_owner) {
if (cr->cr_uid == mp->mnt_cred->cr_uid &&
cr->cr_prison == mp->mnt_cred->cr_prison) {
return (0);
}
}
return (EPERM);
}
/*
* This check is done in kern_link(), so we could just return 0 here.
*/
extern int hardlink_check_uid;
int
secpolicy_basic_link(vnode_t *vp, cred_t *cr)
{
if (!hardlink_check_uid)
return (0);
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_LINK));
}
int
secpolicy_vnode_stky_modify(cred_t *cr)
{
return (EPERM);
}
int
secpolicy_vnode_remove(vnode_t *vp, cred_t *cr)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
}
int
secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner, accmode_t accmode)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
if ((accmode & VREAD) && spl_priv_check_cred(cr, PRIV_VFS_READ) != 0)
return (EACCES);
if ((accmode & VWRITE) &&
spl_priv_check_cred(cr, PRIV_VFS_WRITE) != 0) {
return (EACCES);
}
if (accmode & VEXEC) {
if (vp->v_type == VDIR) {
if (spl_priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0)
return (EACCES);
} else {
if (spl_priv_check_cred(cr, PRIV_VFS_EXEC) != 0)
return (EACCES);
}
}
return (0);
}
/*
* Like secpolicy_vnode_access() but we get the actual wanted mode and the
* current mode of the file, not the missing bits.
*/
int
secpolicy_vnode_access2(cred_t *cr, vnode_t *vp, uid_t owner,
accmode_t curmode, accmode_t wantmode)
{
accmode_t mode;
mode = ~curmode & wantmode;
if (mode == 0)
return (0);
return (secpolicy_vnode_access(cr, vp, owner, mode));
}
int
secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner)
{
static int privs[] = {
PRIV_VFS_ADMIN,
PRIV_VFS_READ,
PRIV_VFS_WRITE,
PRIV_VFS_EXEC,
PRIV_VFS_LOOKUP
};
int i;
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
/* Same as secpolicy_vnode_setdac */
if (owner == cr->cr_uid)
return (0);
for (i = 0; i < sizeof (privs)/sizeof (int); i++) {
int priv;
switch (priv = privs[i]) {
case PRIV_VFS_EXEC:
if (vp->v_type == VDIR)
continue;
break;
case PRIV_VFS_LOOKUP:
if (vp->v_type != VDIR)
continue;
break;
}
if (spl_priv_check_cred(cr, priv) == 0)
return (0);
}
return (EPERM);
}
int
secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner)
{
if (owner == cr->cr_uid)
return (0);
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
}
int
secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap,
const struct vattr *ovap, int flags,
int unlocked_access(void *, int, cred_t *), void *node)
{
int mask = vap->va_mask;
int error;
if (mask & AT_SIZE) {
if (vp->v_type == VDIR)
return (EISDIR);
error = unlocked_access(node, VWRITE, cr);
if (error)
return (error);
}
if (mask & AT_MODE) {
/*
* If not the owner of the file then check privilege
* for two things: the privilege to set the mode at all
* and, if we're setting setuid, we also need permissions
* to add the set-uid bit, if we're not the owner.
* In the specific case of creating a set-uid root
* file, we need even more permissions.
*/
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
if (error)
return (error);
error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cr);
if (error)
return (error);
} else {
vap->va_mode = ovap->va_mode;
}
if (mask & (AT_UID | AT_GID)) {
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
if (error)
return (error);
/*
* To change the owner of a file, or change the group of
* a file to a group of which we are not a member, the
* caller must have privilege.
*/
if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
((mask & AT_GID) && vap->va_gid != ovap->va_gid &&
!groupmember(vap->va_gid, cr))) {
if (secpolicy_fs_owner(vp->v_mount, cr) != 0) {
error = spl_priv_check_cred(cr, PRIV_VFS_CHOWN);
if (error)
return (error);
}
}
if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
((mask & AT_GID) && vap->va_gid != ovap->va_gid)) {
secpolicy_setid_clear(vap, vp, cr);
}
}
if (mask & (AT_ATIME | AT_MTIME)) {
/*
* From utimes(2):
* If times is NULL, ... The caller must be the owner of
* the file, have permission to write the file, or be the
* super-user.
* If times is non-NULL, ... The caller must be the owner of
* the file or be the super-user.
*/
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
if (error && (vap->va_vaflags & VA_UTIMES_NULL))
error = unlocked_access(node, VWRITE, cr);
if (error)
return (error);
}
return (0);
}
int
secpolicy_vnode_create_gid(cred_t *cr)
{
return (EPERM);
}
int
secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid)
{
if (groupmember(gid, cr))
return (0);
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_SETGID));
}
int
secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr,
boolean_t issuidroot __unused)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID));
}
void
secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return;
if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) {
if (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) {
vap->va_mask |= AT_MODE;
vap->va_mode &= ~(S_ISUID|S_ISGID);
}
}
}
int
secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap,
const struct vattr *ovap, cred_t *cr)
{
int error;
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
/*
* Privileged processes may set the sticky bit on non-directories,
* as well as set the setgid bit on a file with a group that the process
* is not a member of. Both of these are allowed in jail(8).
*/
if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) {
if (spl_priv_check_cred(cr, PRIV_VFS_STICKYFILE))
return (EFTYPE);
}
/*
* Check for privilege if attempting to set the
* group-id bit.
*/
if ((vap->va_mode & S_ISGID) != 0) {
error = secpolicy_vnode_setids_setgids(vp, cr, ovap->va_gid);
if (error)
return (error);
}
/*
* Deny setting setuid if we are not the file owner.
*/
if ((vap->va_mode & S_ISUID) && ovap->va_uid != cr->cr_uid) {
error = spl_priv_check_cred(cr, PRIV_VFS_ADMIN);
if (error)
return (error);
}
return (0);
}
int
secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp)
{
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
}
int
secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner)
{
if (owner == cr->cr_uid)
return (0);
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
/* XXX: vfs_suser()? */
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER));
}
int
secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_CHOWN));
}
void
secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp)
{
if (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) {
MNT_ILOCK(vfsp);
vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER;
vfs_clearmntopt(vfsp, MNTOPT_SETUID);
vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 0);
MNT_IUNLOCK(vfsp);
}
}
/*
* Check privileges for setting xvattr attributes
*/
int
secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr,
vtype_t vtype)
{
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
return (0);
return (spl_priv_check_cred(cr, PRIV_VFS_SYSFLAGS));
}
int
secpolicy_smb(cred_t *cr)
{
return (spl_priv_check_cred(cr, PRIV_NETSMB));
}
+79
View File
@@ -0,0 +1,79 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/list.h>
#include <sys/mutex.h>
#include <sys/procfs_list.h>
void
seq_printf(struct seq_file *m, const char *fmt, ...)
{}
void
procfs_list_install(const char *module,
const char *name,
mode_t mode,
procfs_list_t *procfs_list,
int (*show)(struct seq_file *f, void *p),
int (*show_header)(struct seq_file *f),
int (*clear)(procfs_list_t *procfs_list),
size_t procfs_list_node_off)
{
mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&procfs_list->pl_list,
procfs_list_node_off + sizeof (procfs_list_node_t),
procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
procfs_list->pl_next_id = 1;
procfs_list->pl_node_offset = procfs_list_node_off;
}
void
procfs_list_uninstall(procfs_list_t *procfs_list)
{}
void
procfs_list_destroy(procfs_list_t *procfs_list)
{
ASSERT(list_is_empty(&procfs_list->pl_list));
list_destroy(&procfs_list->pl_list);
mutex_destroy(&procfs_list->pl_lock);
}
#define NODE_ID(procfs_list, obj) \
(((procfs_list_node_t *)(((char *)obj) + \
(procfs_list)->pl_node_offset))->pln_id)
void
procfs_list_add(procfs_list_t *procfs_list, void *p)
{
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
list_insert_tail(&procfs_list->pl_list, p);
}
+106
View File
@@ -0,0 +1,106 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* $FreeBSD$
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/param.h>
#include <sys/string.h>
#include <sys/kmem.h>
#include <machine/stdarg.h>
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHA(c) \
(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
char *
strpbrk(const char *s, const char *b)
{
const char *p;
do {
for (p = b; *p != '\0' && *p != *s; ++p)
;
if (*p != '\0')
return ((char *)s);
} while (*s++);
return (NULL);
}
/*
* Convert a string into a valid C identifier by replacing invalid
* characters with '_'. Also makes sure the string is nul-terminated
* and takes up at most n bytes.
*/
void
strident_canon(char *s, size_t n)
{
char c;
char *end = s + n - 1;
if ((c = *s) == 0)
return;
if (!IS_ALPHA(c) && c != '_')
*s = '_';
while (s < end && ((c = *(++s)) != 0)) {
if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
*s = '_';
}
*s = 0;
}
/*
* Do not change the length of the returned string; it must be freed
* with strfree().
*/
char *
kmem_asprintf(const char *fmt, ...)
{
int size;
va_list adx;
char *buf;
va_start(adx, fmt);
size = vsnprintf(NULL, 0, fmt, adx) + 1;
va_end(adx);
buf = kmem_alloc(size, KM_SLEEP);
va_start(adx, fmt);
(void) vsnprintf(buf, size, fmt, adx);
va_end(adx);
return (buf);
}
void
kmem_strfree(char *str)
{
ASSERT(str != NULL);
kmem_free(str, strlen(str) + 1);
}
+74
View File
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/limits.h>
#include <sys/misc.h>
#include <sys/sunddi.h>
#include <sys/sysctl.h>
int
ddi_strtol(const char *str, char **nptr, int base, long *result)
{
*result = strtol(str, nptr, base);
return (0);
}
int
ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
{
if (str == hw_serial) {
*result = prison0.pr_hostid;
return (0);
}
*result = strtoul(str, nptr, base);
return (0);
}
int
ddi_strtoull(const char *str, char **nptr, int base, unsigned long long *result)
{
*result = (unsigned long long)strtouq(str, nptr, base);
return (0);
}
int
ddi_strtoll(const char *str, char **nptr, int base, long long *result)
{
*result = (long long)strtoq(str, nptr, base);
return (0);
}
+259
View File
@@ -0,0 +1,259 @@
/*
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kmem.h>
#include <sys/sbuf.h>
#include <sys/nvpair.h>
#include <sys/sunddi.h>
#include <sys/sysevent.h>
#include <sys/fm/protocol.h>
#include <sys/fm/util.h>
#include <sys/bus.h>
static int
log_sysevent(nvlist_t *event)
{
struct sbuf *sb;
const char *type;
char typestr[128];
nvpair_t *elem = NULL;
sb = sbuf_new_auto();
if (sb == NULL)
return (ENOMEM);
type = NULL;
while ((elem = nvlist_next_nvpair(event, elem)) != NULL) {
switch (nvpair_type(elem)) {
case DATA_TYPE_BOOLEAN:
{
boolean_t value;
(void) nvpair_value_boolean_value(elem, &value);
sbuf_printf(sb, " %s=%s", nvpair_name(elem),
value ? "true" : "false");
break;
}
case DATA_TYPE_UINT8:
{
uint8_t value;
(void) nvpair_value_uint8(elem, &value);
sbuf_printf(sb, " %s=%hhu", nvpair_name(elem), value);
break;
}
case DATA_TYPE_INT32:
{
int32_t value;
(void) nvpair_value_int32(elem, &value);
sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
(intmax_t)value);
break;
}
case DATA_TYPE_UINT32:
{
uint32_t value;
(void) nvpair_value_uint32(elem, &value);
sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
(uintmax_t)value);
break;
}
case DATA_TYPE_INT64:
{
int64_t value;
(void) nvpair_value_int64(elem, &value);
sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
(intmax_t)value);
break;
}
case DATA_TYPE_UINT64:
{
uint64_t value;
(void) nvpair_value_uint64(elem, &value);
sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
(uintmax_t)value);
break;
}
case DATA_TYPE_STRING:
{
char *value;
(void) nvpair_value_string(elem, &value);
sbuf_printf(sb, " %s=%s", nvpair_name(elem), value);
if (strcmp(FM_CLASS, nvpair_name(elem)) == 0)
type = value;
break;
}
case DATA_TYPE_UINT8_ARRAY:
{
uint8_t *value;
uint_t ii, nelem;
(void) nvpair_value_uint8_array(elem, &value, &nelem);
sbuf_printf(sb, " %s=", nvpair_name(elem));
for (ii = 0; ii < nelem; ii++)
sbuf_printf(sb, "%02hhx", value[ii]);
break;
}
case DATA_TYPE_UINT16_ARRAY:
{
uint16_t *value;
uint_t ii, nelem;
(void) nvpair_value_uint16_array(elem, &value, &nelem);
sbuf_printf(sb, " %s=", nvpair_name(elem));
for (ii = 0; ii < nelem; ii++)
sbuf_printf(sb, "%04hx", value[ii]);
break;
}
case DATA_TYPE_UINT32_ARRAY:
{
uint32_t *value;
uint_t ii, nelem;
(void) nvpair_value_uint32_array(elem, &value, &nelem);
sbuf_printf(sb, " %s=", nvpair_name(elem));
for (ii = 0; ii < nelem; ii++)
sbuf_printf(sb, "%08jx", (uintmax_t)value[ii]);
break;
}
case DATA_TYPE_INT64_ARRAY:
{
int64_t *value;
uint_t ii, nelem;
(void) nvpair_value_int64_array(elem, &value, &nelem);
sbuf_printf(sb, " %s=", nvpair_name(elem));
for (ii = 0; ii < nelem; ii++)
sbuf_printf(sb, "%016lld",
(long long)value[ii]);
break;
}
case DATA_TYPE_UINT64_ARRAY:
{
uint64_t *value;
uint_t ii, nelem;
(void) nvpair_value_uint64_array(elem, &value, &nelem);
sbuf_printf(sb, " %s=", nvpair_name(elem));
for (ii = 0; ii < nelem; ii++)
sbuf_printf(sb, "%016jx", (uintmax_t)value[ii]);
break;
}
case DATA_TYPE_STRING_ARRAY:
{
char **strarr;
uint_t ii, nelem;
(void) nvpair_value_string_array(elem, &strarr, &nelem);
for (ii = 0; ii < nelem; ii++) {
if (strarr[ii] == NULL) {
sbuf_printf(sb, " <NULL>");
continue;
}
sbuf_printf(sb, " %s", strarr[ii]);
if (strcmp(FM_CLASS, strarr[ii]) == 0)
type = strarr[ii];
}
break;
}
case DATA_TYPE_NVLIST:
/* XXX - requires recursing in log_sysevent */
break;
default:
printf("%s: type %d is not implemented\n", __func__,
nvpair_type(elem));
break;
}
}
if (sbuf_finish(sb) != 0) {
sbuf_delete(sb);
return (ENOMEM);
}
if (type == NULL)
type = "";
if (strncmp(type, "ESC_ZFS_", 8) == 0) {
snprintf(typestr, sizeof (typestr), "misc.fs.zfs.%s", type + 8);
type = typestr;
}
devctl_notify("ZFS", "ZFS", type, sbuf_data(sb));
sbuf_delete(sb);
return (0);
}
static void
sysevent_worker(void *arg __unused)
{
zfs_zevent_t *ze;
nvlist_t *event;
uint64_t dropped = 0;
uint64_t dst_size;
int error;
zfs_zevent_init(&ze);
for (;;) {
dst_size = 131072;
dropped = 0;
event = NULL;
error = zfs_zevent_next(ze, &event,
&dst_size, &dropped);
if (error) {
error = zfs_zevent_wait(ze);
if (error == ESHUTDOWN)
break;
} else {
VERIFY(event != NULL);
log_sysevent(event);
nvlist_free(event);
}
}
zfs_zevent_destroy(ze);
kthread_exit();
}
void
ddi_sysevent_init(void)
{
kproc_kthread_add(sysevent_worker, NULL, &zfsproc, NULL, 0, 0,
"zfskern", "sysevent");
}
+329
View File
@@ -0,0 +1,329 @@
/*
* Copyright (c) 2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Copyright (c) 2012 Spectra Logic Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/taskq.h>
#include <sys/zfs_context.h>
#include <vm/uma.h>
static uint_t taskq_tsd;
static uma_zone_t taskq_zone;
taskq_t *system_taskq = NULL;
taskq_t *system_delay_taskq = NULL;
taskq_t *dynamic_taskq = NULL;
extern int uma_align_cache;
#define TQ_MASK uma_align_cache
#define TQ_PTR_MASK ~uma_align_cache
#define TIMEOUT_TASK 1
#define NORMAL_TASK 2
static int
taskqent_init(void *mem, int size, int flags)
{
bzero(mem, sizeof (taskq_ent_t));
return (0);
}
static int
taskqent_ctor(void *mem, int size, void *arg, int flags)
{
return (0);
}
static void
taskqent_dtor(void *mem, int size, void *arg)
{
taskq_ent_t *ent = mem;
ent->tqent_gen = (ent->tqent_gen + 1) & TQ_MASK;
}
static void
system_taskq_init(void *arg)
{
tsd_create(&taskq_tsd, NULL);
taskq_zone = uma_zcreate("taskq_zone", sizeof (taskq_ent_t),
taskqent_ctor, taskqent_dtor, taskqent_init, NULL,
UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
system_taskq = taskq_create("system_taskq", mp_ncpus, minclsyspri,
0, 0, 0);
system_delay_taskq = taskq_create("system_delay_taskq", mp_ncpus,
minclsyspri, 0, 0, 0);
}
SYSINIT(system_taskq_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_init,
NULL);
static void
system_taskq_fini(void *arg)
{
taskq_destroy(system_taskq);
uma_zdestroy(taskq_zone);
tsd_destroy(&taskq_tsd);
}
SYSUNINIT(system_taskq_fini, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_fini,
NULL);
static void
taskq_tsd_set(void *context)
{
taskq_t *tq = context;
tsd_set(taskq_tsd, tq);
}
static taskq_t *
taskq_create_with_init(const char *name, int nthreads, pri_t pri,
int minalloc __unused, int maxalloc __unused, uint_t flags)
{
taskq_t *tq;
if ((flags & TASKQ_THREADS_CPU_PCT) != 0)
nthreads = MAX((mp_ncpus * nthreads) / 100, 1);
tq = kmem_alloc(sizeof (*tq), KM_SLEEP);
tq->tq_queue = taskqueue_create(name, M_WAITOK,
taskqueue_thread_enqueue, &tq->tq_queue);
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT,
taskq_tsd_set, tq);
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN,
taskq_tsd_set, NULL);
(void) taskqueue_start_threads(&tq->tq_queue, nthreads, pri,
"%s", name);
return ((taskq_t *)tq);
}
taskq_t *
taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused,
int maxalloc __unused, uint_t flags)
{
return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc,
flags));
}
taskq_t *
taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc,
int maxalloc, proc_t *proc __unused, uint_t flags)
{
return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc,
flags));
}
void
taskq_destroy(taskq_t *tq)
{
taskqueue_free(tq->tq_queue);
kmem_free(tq, sizeof (*tq));
}
int
taskq_member(taskq_t *tq, kthread_t *thread)
{
return (taskqueue_member(tq->tq_queue, thread));
}
taskq_t *
taskq_of_curthread(void)
{
return (tsd_get(taskq_tsd));
}
int
taskq_cancel_id(taskq_t *tq, taskqid_t tid)
{
uint32_t pend;
int rc;
taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK);
if (ent == NULL)
return (0);
if ((tid & TQ_MASK) != ent->tqent_gen)
return (0);
if (ent->tqent_type == TIMEOUT_TASK) {
rc = taskqueue_cancel_timeout(tq->tq_queue,
&ent->tqent_timeout_task, &pend);
} else
rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
if (rc == EBUSY)
taskq_wait_id(tq, tid);
else
uma_zfree(taskq_zone, ent);
return (rc);
}
static void
taskq_run(void *arg, int pending __unused)
{
taskq_ent_t *task = arg;
task->tqent_func(task->tqent_arg);
uma_zfree(taskq_zone, task);
}
taskqid_t
taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
uint_t flags, clock_t expire_time)
{
taskq_ent_t *task;
taskqid_t tid;
clock_t timo;
int mflag;
timo = expire_time - ddi_get_lbolt();
if (timo <= 0)
return (taskq_dispatch(tq, func, arg, flags));
if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
mflag = M_WAITOK;
else
mflag = M_NOWAIT;
task = uma_zalloc(taskq_zone, mflag);
if (task == NULL)
return (0);
tid = (uintptr_t)task;
MPASS((tid & TQ_MASK) == 0);
task->tqent_func = func;
task->tqent_arg = arg;
task->tqent_type = TIMEOUT_TASK;
tid |= task->tqent_gen;
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
taskq_run, task);
taskqueue_enqueue_timeout(tq->tq_queue, &task->tqent_timeout_task,
timo);
return (tid);
}
taskqid_t
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
{
taskq_ent_t *task;
int mflag, prio;
taskqid_t tid;
if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
mflag = M_WAITOK;
else
mflag = M_NOWAIT;
/*
* If TQ_FRONT is given, we want higher priority for this task, so it
* can go at the front of the queue.
*/
prio = !!(flags & TQ_FRONT);
task = uma_zalloc(taskq_zone, mflag);
if (task == NULL)
return (0);
tid = (uintptr_t)task;
MPASS((tid & TQ_MASK) == 0);
task->tqent_func = func;
task->tqent_arg = arg;
task->tqent_type = NORMAL_TASK;
TASK_INIT(&task->tqent_task, prio, taskq_run, task);
tid |= task->tqent_gen;
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
return (tid);
}
static void
taskq_run_ent(void *arg, int pending __unused)
{
taskq_ent_t *task = arg;
task->tqent_func(task->tqent_arg);
}
void
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
taskq_ent_t *task)
{
int prio;
/*
* If TQ_FRONT is given, we want higher priority for this task, so it
* can go at the front of the queue.
*/
prio = !!(flags & TQ_FRONT);
task->tqent_func = func;
task->tqent_arg = arg;
TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task);
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
}
void
taskq_wait(taskq_t *tq)
{
taskqueue_quiesce(tq->tq_queue);
}
void
taskq_wait_id(taskq_t *tq, taskqid_t tid)
{
taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK);
if ((tid & TQ_MASK) != ent->tqent_gen)
return;
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
}
void
taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused)
{
taskqueue_drain_all(tq->tq_queue);
}
int
taskq_empty_ent(taskq_ent_t *t)
{
return (t->tqent_task.ta_pending == 0);
}
+92
View File
@@ -0,0 +1,92 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* $FreeBSD$
*/
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/vnode.h>
/*
* same as uiomove() but doesn't modify uio structure.
* return in cbytes how many bytes were copied.
*/
int
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
{
struct iovec small_iovec[1];
struct uio small_uio_clone;
struct uio *uio_clone;
int error;
ASSERT3U(uio->uio_rw, ==, rw);
if (uio->uio_iovcnt == 1) {
small_uio_clone = *uio;
small_iovec[0] = *uio->uio_iov;
small_uio_clone.uio_iov = small_iovec;
uio_clone = &small_uio_clone;
} else {
uio_clone = cloneuio(uio);
}
error = vn_io_fault_uiomove(p, n, uio_clone);
*cbytes = uio->uio_resid - uio_clone->uio_resid;
if (uio_clone != &small_uio_clone)
free(uio_clone, M_IOV);
return (error);
}
/*
* Drop the next n chars out of *uiop.
*/
void
uioskip(uio_t *uio, size_t n)
{
enum uio_seg segflg;
/* For the full compatibility with illumos. */
if (n > uio->uio_resid)
return;
segflg = uio->uio_segflg;
uio->uio_segflg = UIO_NOCOPY;
uiomove(NULL, n, uio->uio_rw, uio);
uio->uio_segflg = segflg;
}
+278
View File
@@ -0,0 +1,278 @@
/*
* Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/cred.h>
#include <sys/vfs.h>
#include <sys/priv.h>
#include <sys/libkern.h>
#include <sys/mutex.h>
#include <sys/vnode.h>
MALLOC_DECLARE(M_MOUNT);
void
vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
int flags __unused)
{
struct vfsopt *opt;
size_t namesize;
int locked;
if (!(locked = mtx_owned(MNT_MTX(vfsp))))
MNT_ILOCK(vfsp);
if (vfsp->mnt_opt == NULL) {
void *opts;
MNT_IUNLOCK(vfsp);
opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
MNT_ILOCK(vfsp);
if (vfsp->mnt_opt == NULL) {
vfsp->mnt_opt = opts;
TAILQ_INIT(vfsp->mnt_opt);
} else {
free(opts, M_MOUNT);
}
}
MNT_IUNLOCK(vfsp);
opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
namesize = strlen(name) + 1;
opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
strlcpy(opt->name, name, namesize);
opt->pos = -1;
opt->seen = 1;
if (arg == NULL) {
opt->value = NULL;
opt->len = 0;
} else {
opt->len = strlen(arg) + 1;
opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
bcopy(arg, opt->value, opt->len);
}
MNT_ILOCK(vfsp);
TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
if (!locked)
MNT_IUNLOCK(vfsp);
}
void
vfs_clearmntopt(vfs_t *vfsp, const char *name)
{
int locked;
if (!(locked = mtx_owned(MNT_MTX(vfsp))))
MNT_ILOCK(vfsp);
vfs_deleteopt(vfsp->mnt_opt, name);
if (!locked)
MNT_IUNLOCK(vfsp);
}
int
vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
{
struct vfsoptlist *opts = vfsp->mnt_optnew;
int error;
if (opts == NULL)
return (0);
error = vfs_getopt(opts, opt, (void **)argp, NULL);
return (error != 0 ? 0 : 1);
}
int
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
char *fspec, int fsflags)
{
struct vfsconf *vfsp;
struct mount *mp;
vnode_t *vp, *mvp;
struct ucred *cr;
int error;
ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
vp = *vpp;
*vpp = NULL;
error = 0;
/*
* Be ultra-paranoid about making sure the type and fspath
* variables will fit in our mp buffers, including the
* terminating NUL.
*/
if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
error = ENAMETOOLONG;
if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
error = ENODEV;
if (error == 0 && vp->v_type != VDIR)
error = ENOTDIR;
/*
* We need vnode lock to protect v_mountedhere and vnode interlock
* to protect v_iflag.
*/
if (error == 0) {
VI_LOCK(vp);
if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
vp->v_iflag |= VI_MOUNT;
else
error = EBUSY;
VI_UNLOCK(vp);
}
if (error != 0) {
vput(vp);
return (error);
}
VOP_UNLOCK1(vp);
/*
* Allocate and initialize the filesystem.
* We don't want regular user that triggered snapshot mount to be able
* to unmount it, so pass credentials of the parent mount.
*/
mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
mp->mnt_optnew = NULL;
vfs_setmntopt(mp, "from", fspec, 0);
mp->mnt_optnew = mp->mnt_opt;
mp->mnt_opt = NULL;
/*
* Set the mount level flags.
*/
mp->mnt_flag = fsflags & MNT_UPDATEMASK;
/*
* Snapshots are always read-only.
*/
mp->mnt_flag |= MNT_RDONLY;
/*
* We don't want snapshots to allow access to vulnerable setuid
* programs, so we turn off setuid when mounting snapshots.
*/
mp->mnt_flag |= MNT_NOSUID;
/*
* We don't want snapshots to be visible in regular
* mount(8) and df(1) output.
*/
mp->mnt_flag |= MNT_IGNORE;
/*
* XXX: This is evil, but we can't mount a snapshot as a regular user.
* XXX: Is is safe when snapshot is mounted from within a jail?
*/
cr = td->td_ucred;
td->td_ucred = kcred;
error = VFS_MOUNT(mp);
td->td_ucred = cr;
if (error != 0) {
/*
* Clear VI_MOUNT and decrement the use count "atomically",
* under the vnode lock. This is not strictly required,
* but makes it easier to reason about the life-cycle and
* ownership of the covered vnode.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
VI_UNLOCK(vp);
vput(vp);
vfs_unbusy(mp);
vfs_freeopts(mp->mnt_optnew);
mp->mnt_vnodecovered = NULL;
vfs_mount_destroy(mp);
return (error);
}
if (mp->mnt_opt != NULL)
vfs_freeopts(mp->mnt_opt);
mp->mnt_opt = mp->mnt_optnew;
(void) VFS_STATFS(mp, &mp->mnt_stat);
/*
* Prevent external consumers of mount options from reading
* mnt_optnew.
*/
mp->mnt_optnew = NULL;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
#ifdef FREEBSD_NAMECACHE
cache_purge(vp);
#endif
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
VI_UNLOCK(vp);
vp->v_mountedhere = mp;
/* Put the new filesystem on the mount list. */
mtx_lock(&mountlist_mtx);
TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
vfs_event_signal(NULL, VQ_MOUNT, 0);
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
panic("mount: lost mount");
VOP_UNLOCK1(vp);
#if __FreeBSD_version >= 1300048
vfs_op_exit(mp);
#endif
vfs_unbusy(mp);
*vpp = mvp;
return (0);
}
/*
* Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
* asynchronously using a taskq. This can avoid deadlocks caused by re-entering
* the file system as a result of releasing the vnode. Note, file systems
* already have to handle the race where the vnode is incremented before the
* inactive routine is called and does its locking.
*
* Warning: Excessive use of this routine can lead to performance problems.
* This is because taskqs throttle back allocation if too many are created.
*/
void
vn_rele_async(vnode_t *vp, taskq_t *taskq)
{
VERIFY(vp->v_count > 0);
if (refcount_release_if_not_last(&vp->v_usecount)) {
#if __FreeBSD_version < 1300045
vdrop(vp);
#endif
return;
}
VERIFY(taskq_dispatch((taskq_t *)taskq,
(task_func_t *)vrele, vp, TQ_SLEEP) != 0);
}
+71
View File
@@ -0,0 +1,71 @@
/*
* Copyright (c) 2013 EMC Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/byteorder.h>
#include <sys/lock.h>
#include <sys/freebsd_rwlock.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
const int zfs_vm_pagerret_ok = VM_PAGER_OK;
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
void
zfs_vmobject_assert_wlocked(vm_object_t object)
{
/*
* This is not ideal because FILE/LINE used by assertions will not
* be too helpful, but it must be an hard function for
* compatibility reasons.
*/
VM_OBJECT_ASSERT_WLOCKED(object);
}
void
zfs_vmobject_wlock(vm_object_t object)
{
VM_OBJECT_WLOCK(object);
}
void
zfs_vmobject_wunlock(vm_object_t object)
{
VM_OBJECT_WUNLOCK(object);
}
+268
View File
@@ -0,0 +1,268 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/zmod.h>
#if __FreeBSD_version >= 1300041
#include <contrib/zlib/zlib.h>
#else
#include <sys/zlib.h>
#endif
#include <sys/kobj.h>
/*ARGSUSED*/
static void *
zcalloc(void *opaque, uint_t items, uint_t size)
{
return (malloc((size_t)items*size, M_SOLARIS, M_NOWAIT));
}
/*ARGSUSED*/
static void
zcfree(void *opaque, void *ptr)
{
free(ptr, M_SOLARIS);
}
static int
zlib_deflateInit(z_stream *stream, int level)
{
stream->zalloc = zcalloc;
stream->opaque = NULL;
stream->zfree = zcfree;
return (deflateInit(stream, level));
}
static int
zlib_deflate(z_stream *stream, int flush)
{
return (deflate(stream, flush));
}
static int
zlib_deflateEnd(z_stream *stream)
{
return (deflateEnd(stream));
}
static int
zlib_inflateInit(z_stream *stream)
{
stream->zalloc = zcalloc;
stream->opaque = NULL;
stream->zfree = zcfree;
return (inflateInit(stream));
}
static int
zlib_inflate(z_stream *stream, int finish)
{
#if __FreeBSD_version >= 1300024
return (inflate(stream, finish));
#else
return (_zlib104_inflate(stream, finish));
#endif
}
static int
zlib_inflateEnd(z_stream *stream)
{
return (inflateInit(stream));
}
/*
* A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
* and vfree for every call. Using a kmem_cache also has the advantage
* that improves the odds that the memory used will be local to this cpu.
* To further improve things it might be wise to create a dedicated per-cpu
* workspace for use. This would take some additional care because we then
* must disable preemption around the critical section, and verify that
* zlib_deflate* and zlib_inflate* never internally call schedule().
*/
static void *
zlib_workspace_alloc(int flags)
{
// return (kmem_cache_alloc(zlib_workspace_cache, flags));
return (NULL);
}
static void
zlib_workspace_free(void *workspace)
{
// kmem_cache_free(zlib_workspace_cache, workspace);
}
/*
* Compresses the source buffer into the destination buffer. The level
* parameter has the same meaning as in deflateInit. sourceLen is the byte
* length of the source buffer. Upon entry, destLen is the total size of the
* destination buffer, which must be at least 0.1% larger than sourceLen plus
* 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
*
* compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
* memory, Z_BUF_ERROR if there was not enough room in the output buffer,
* Z_STREAM_ERROR if the level parameter is invalid.
*/
int
z_compress_level(void *dest, size_t *destLen, const void *source,
size_t sourceLen, int level)
{
z_stream stream;
int err;
bzero(&stream, sizeof (stream));
stream.next_in = (Byte *)source;
stream.avail_in = (uInt)sourceLen;
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
stream.opaque = NULL;
if ((size_t)stream.avail_out != *destLen)
return (Z_BUF_ERROR);
stream.opaque = zlib_workspace_alloc(KM_SLEEP);
#if 0
if (!stream.opaque)
return (Z_MEM_ERROR);
#endif
err = zlib_deflateInit(&stream, level);
if (err != Z_OK) {
zlib_workspace_free(stream.opaque);
return (err);
}
err = zlib_deflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_deflateEnd(&stream);
zlib_workspace_free(stream.opaque);
return (err == Z_OK ? Z_BUF_ERROR : err);
}
*destLen = stream.total_out;
err = zlib_deflateEnd(&stream);
zlib_workspace_free(stream.opaque);
return (err);
}
/*
* Decompresses the source buffer into the destination buffer. sourceLen is
* the byte length of the source buffer. Upon entry, destLen is the total
* size of the destination buffer, which must be large enough to hold the
* entire uncompressed data. (The size of the uncompressed data must have
* been saved previously by the compressor and transmitted to the decompressor
* by some mechanism outside the scope of this compression library.)
* Upon exit, destLen is the actual size of the compressed buffer.
* This function can be used to decompress a whole file at once if the
* input file is mmap'ed.
*
* uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
* enough memory, Z_BUF_ERROR if there was not enough room in the output
* buffer, or Z_DATA_ERROR if the input data was corrupted.
*/
int
z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
{
z_stream stream;
int err;
bzero(&stream, sizeof (stream));
stream.next_in = (Byte *)source;
stream.avail_in = (uInt)sourceLen;
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((size_t)stream.avail_out != *destLen)
return (Z_BUF_ERROR);
stream.opaque = zlib_workspace_alloc(KM_SLEEP);
#if 0
if (!stream.opaque)
return (Z_MEM_ERROR);
#endif
err = zlib_inflateInit(&stream);
if (err != Z_OK) {
zlib_workspace_free(stream.opaque);
return (err);
}
err = zlib_inflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_inflateEnd(&stream);
zlib_workspace_free(stream.opaque);
if (err == Z_NEED_DICT ||
(err == Z_BUF_ERROR && stream.avail_in == 0))
return (Z_DATA_ERROR);
return (err);
}
*destLen = stream.total_out;
err = zlib_inflateEnd(&stream);
zlib_workspace_free(stream.opaque);
return (err);
}
#if 0
int
spl_zlib_init(void)
{
int size;
size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
zlib_workspace_cache = kmem_cache_create(
"spl_zlib_workspace_cache",
size, 0, NULL, NULL, NULL, NULL, NULL,
KMC_VMEM | KMC_NOEMERGENCY);
if (!zlib_workspace_cache)
return (1);
return (0);
}
void
spl_zlib_fini(void)
{
kmem_cache_destroy(zlib_workspace_cache);
zlib_workspace_cache = NULL;
}
#endif
+265
View File
@@ -0,0 +1,265 @@
/*
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/jail.h>
#include <sys/osd.h>
#include <sys/priv.h>
#include <sys/zone.h>
#include <sys/policy.h>
static MALLOC_DEFINE(M_ZONES, "zones_data", "Zones data");
/*
* Structure to record list of ZFS datasets exported to a zone.
*/
typedef struct zone_dataset {
LIST_ENTRY(zone_dataset) zd_next;
char zd_dataset[0];
} zone_dataset_t;
LIST_HEAD(zone_dataset_head, zone_dataset);
static int zone_slot;
int
zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
{
struct zone_dataset_head *head;
zone_dataset_t *zd, *zd2;
struct prison *pr;
int dofree, error;
if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
return (error);
/* Allocate memory before we grab prison's mutex. */
zd = malloc(sizeof (*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK);
sx_slock(&allprison_lock);
pr = prison_find(jailid); /* Locks &pr->pr_mtx. */
sx_sunlock(&allprison_lock);
if (pr == NULL) {
free(zd, M_ZONES);
return (ENOENT);
}
head = osd_jail_get(pr, zone_slot);
if (head != NULL) {
dofree = 0;
LIST_FOREACH(zd2, head, zd_next) {
if (strcmp(dataset, zd2->zd_dataset) == 0) {
free(zd, M_ZONES);
error = EEXIST;
goto end;
}
}
} else {
dofree = 1;
prison_hold_locked(pr);
mtx_unlock(&pr->pr_mtx);
head = malloc(sizeof (*head), M_ZONES, M_WAITOK);
LIST_INIT(head);
mtx_lock(&pr->pr_mtx);
error = osd_jail_set(pr, zone_slot, head);
KASSERT(error == 0, ("osd_jail_set() failed (error=%d)",
error));
}
strcpy(zd->zd_dataset, dataset);
LIST_INSERT_HEAD(head, zd, zd_next);
end:
if (dofree)
prison_free_locked(pr);
else
mtx_unlock(&pr->pr_mtx);
return (error);
}
int
zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid)
{
struct zone_dataset_head *head;
zone_dataset_t *zd;
struct prison *pr;
int error;
if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
return (error);
sx_slock(&allprison_lock);
pr = prison_find(jailid);
sx_sunlock(&allprison_lock);
if (pr == NULL)
return (ENOENT);
head = osd_jail_get(pr, zone_slot);
if (head == NULL) {
error = ENOENT;
goto end;
}
LIST_FOREACH(zd, head, zd_next) {
if (strcmp(dataset, zd->zd_dataset) == 0)
break;
}
if (zd == NULL)
error = ENOENT;
else {
LIST_REMOVE(zd, zd_next);
free(zd, M_ZONES);
if (LIST_EMPTY(head))
osd_jail_del(pr, zone_slot);
error = 0;
}
end:
mtx_unlock(&pr->pr_mtx);
return (error);
}
/*
* Returns true if the named dataset is visible in the current zone.
* The 'write' parameter is set to 1 if the dataset is also writable.
*/
int
zone_dataset_visible(const char *dataset, int *write)
{
struct zone_dataset_head *head;
zone_dataset_t *zd;
struct prison *pr;
size_t len;
int ret = 0;
if (dataset[0] == '\0')
return (0);
if (INGLOBALZONE(curproc)) {
if (write != NULL)
*write = 1;
return (1);
}
pr = curthread->td_ucred->cr_prison;
mtx_lock(&pr->pr_mtx);
head = osd_jail_get(pr, zone_slot);
if (head == NULL)
goto end;
/*
* Walk the list once, looking for datasets which match exactly, or
* specify a dataset underneath an exported dataset. If found, return
* true and note that it is writable.
*/
LIST_FOREACH(zd, head, zd_next) {
len = strlen(zd->zd_dataset);
if (strlen(dataset) >= len &&
bcmp(dataset, zd->zd_dataset, len) == 0 &&
(dataset[len] == '\0' || dataset[len] == '/' ||
dataset[len] == '@')) {
if (write)
*write = 1;
ret = 1;
goto end;
}
}
/*
* Walk the list a second time, searching for datasets which are parents
* of exported datasets. These should be visible, but read-only.
*
* Note that we also have to support forms such as 'pool/dataset/', with
* a trailing slash.
*/
LIST_FOREACH(zd, head, zd_next) {
len = strlen(dataset);
if (dataset[len - 1] == '/')
len--; /* Ignore trailing slash */
if (len < strlen(zd->zd_dataset) &&
bcmp(dataset, zd->zd_dataset, len) == 0 &&
zd->zd_dataset[len] == '/') {
if (write)
*write = 0;
ret = 1;
goto end;
}
}
end:
mtx_unlock(&pr->pr_mtx);
return (ret);
}
static void
zone_destroy(void *arg)
{
struct zone_dataset_head *head;
zone_dataset_t *zd;
head = arg;
while ((zd = LIST_FIRST(head)) != NULL) {
LIST_REMOVE(zd, zd_next);
free(zd, M_ZONES);
}
free(head, M_ZONES);
}
uint32_t
zone_get_hostid(void *ptr)
{
KASSERT(ptr == NULL, ("only NULL pointer supported in %s", __func__));
return ((uint32_t)curthread->td_ucred->cr_prison->pr_hostid);
}
boolean_t
in_globalzone(struct proc *p)
{
return (!jailed(FIRST_THREAD_IN_PROC((p))->td_ucred));
}
static void
zone_sysinit(void *arg __unused)
{
zone_slot = osd_jail_register(zone_destroy, NULL);
}
static void
zone_sysuninit(void *arg __unused)
{
osd_jail_deregister(zone_slot);
}
SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL);
SYSUNINIT(zone_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysuninit, NULL);
File diff suppressed because it is too large Load Diff
+245
View File
@@ -0,0 +1,245 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/spa_impl.h>
#include <sys/zio_compress.h>
#include <sys/zio_checksum.h>
#include <sys/zfs_context.h>
#include <sys/arc.h>
#include <sys/refcount.h>
#include <sys/vdev.h>
#include <sys/vdev_trim.h>
#include <sys/vdev_impl.h>
#include <sys/dsl_pool.h>
#include <sys/zio_checksum.h>
#include <sys/multilist.h>
#include <sys/abd.h>
#include <sys/zil.h>
#include <sys/fm/fs/zfs.h>
#include <sys/eventhandler.h>
#include <sys/callb.h>
#include <sys/kstat.h>
#include <sys/zthr.h>
#include <zfs_fletcher.h>
#include <sys/arc_impl.h>
#include <sys/sdt.h>
#include <sys/aggsum.h>
#include <cityhash.h>
extern struct vfsops zfs_vfsops;
/* vmem_size typemask */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#define VMEM_MAXFREE 0x10
typedef size_t vmem_size_t;
extern vmem_size_t vmem_size(vmem_t *vm, int typemask);
uint_t zfs_arc_free_target = 0;
int64_t last_free_memory;
free_memory_reason_t last_free_reason;
int64_t
arc_available_memory(void)
{
int64_t lowest = INT64_MAX;
int64_t n __unused;
free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
/*
* Cooperate with pagedaemon when it's time for it to scan
* and reclaim some pages.
*/
n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
if (n < lowest) {
lowest = n;
r = FMR_LOTSFREE;
}
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
/*
* If we're on an i386 platform, it's possible that we'll exhaust the
* kernel heap space before we ever run out of available physical
* memory. Most checks of the size of the heap_area compare against
* tune.t_minarmem, which is the minimum available real memory that we
* can have in the system. However, this is generally fixed at 25 pages
* which is so low that it's useless. In this comparison, we seek to
* calculate the total heap-size, and reclaim if more than 3/4ths of the
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
n = uma_avail() - (long)(uma_limit() / 4);
if (n < lowest) {
lowest = n;
r = FMR_HEAP_ARENA;
}
#endif
/*
* If zio data pages are being allocated out of a separate heap segment,
* then enforce that the size of available vmem for this arena remains
* above about 1/4th (1/(2^arc_zio_arena_free_shift)) free.
*
* Note that reducing the arc_zio_arena_free_shift keeps more virtual
* memory (in the zio_arena) free, which can avoid memory
* fragmentation issues.
*/
if (zio_arena != NULL) {
n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
(vmem_size(zio_arena, VMEM_ALLOC) >>
arc_zio_arena_free_shift);
if (n < lowest) {
lowest = n;
r = FMR_ZIO_ARENA;
}
}
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
lowest = -1024;
#endif /* _KERNEL */
last_free_memory = lowest;
last_free_reason = r;
DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r);
return (lowest);
}
/*
* Return a default max arc size based on the amount of physical memory.
*/
uint64_t
arc_default_max(uint64_t min, uint64_t allmem)
{
uint64_t size;
if (allmem >= 1 << 30)
size = allmem - (1 << 30);
else
size = min;
return (MAX(allmem * 5 / 8, size));
}
/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *arg)
{
int64_t nr_scan = *(int64_t *)arg;
arc_reduce_target_size(ptob(nr_scan));
free(arg, M_TEMP);
vnlru_free(nr_scan, &zfs_vfsops);
}
/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(int64_t adjust)
{
int64_t *adjustptr;
if ((adjustptr = malloc(sizeof (int64_t), M_TEMP, M_NOWAIT)) == NULL)
return;
*adjustptr = adjust;
taskq_dispatch(arc_prune_taskq, arc_prune_task, adjustptr, TQ_SLEEP);
ARCSTAT_BUMP(arcstat_prune);
}
uint64_t
arc_all_memory(void)
{
return ((uint64_t)ptob(physmem));
}
int
arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
{
return (0);
}
uint64_t
arc_free_memory(void)
{
/* XXX */
return (0);
}
static eventhandler_tag arc_event_lowmem = NULL;
static void
arc_lowmem(void *arg __unused, int howto __unused)
{
int64_t free_memory, to_free;
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
free_memory = arc_available_memory();
to_free = (arc_c >> arc_shrink_shift) - MIN(free_memory, 0);
DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
arc_reduce_target_size(to_free);
mutex_enter(&arc_adjust_lock);
arc_adjust_needed = B_TRUE;
zthr_wakeup(arc_adjust_zthr);
/*
* It is unsafe to block here in arbitrary threads, because we can come
* here from ARC itself and may hold ARC locks and thus risk a deadlock
* with ARC reclaim thread.
*/
if (curproc == pageproc)
(void) cv_wait(&arc_adjust_waiters_cv, &arc_adjust_lock);
mutex_exit(&arc_adjust_lock);
}
void
arc_lowmem_init(void)
{
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST);
}
void
arc_lowmem_fini(void)
{
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
}
+613
View File
@@ -0,0 +1,613 @@
/*
* Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* Copyright (c) 2018 Sean Eric Fagan <sef@ixsystems.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Portions of this file are derived from sys/geom/eli/g_eli_hmac.c
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/errno.h>
#ifdef _KERNEL
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <opencrypto/cryptodev.h>
#include <opencrypto/xform.h>
#else
#include <strings.h>
#endif
#include <sys/zio_crypt.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <sys/freebsd_crypto.h>
#define SHA512_HMAC_BLOCK_SIZE 128
static int crypt_sessions = 0;
SYSCTL_DECL(_vfs_zfs);
SYSCTL_INT(_vfs_zfs, OID_AUTO, crypt_sessions, CTLFLAG_RD,
&crypt_sessions, 0, "Number of cryptographic sessions created");
void
crypto_mac_init(struct hmac_ctx *ctx, const crypto_key_t *c_key)
{
uint8_t k_ipad[SHA512_HMAC_BLOCK_SIZE],
k_opad[SHA512_HMAC_BLOCK_SIZE],
key[SHA512_HMAC_BLOCK_SIZE];
SHA512_CTX lctx;
int i;
size_t cl_bytes = CRYPTO_BITS2BYTES(c_key->ck_length);
/*
* This code is based on the similar code in geom/eli/g_eli_hmac.c
*/
explicit_bzero(key, sizeof (key));
if (c_key->ck_length == 0)
/* do nothing */;
else if (cl_bytes <= SHA512_HMAC_BLOCK_SIZE)
bcopy(c_key->ck_data, key, cl_bytes);
else {
/*
* If key is longer than 128 bytes reset it to
* key = SHA512(key).
*/
SHA512_Init(&lctx);
SHA512_Update(&lctx, c_key->ck_data, cl_bytes);
SHA512_Final(key, &lctx);
}
/* XOR key with ipad and opad values. */
for (i = 0; i < sizeof (key); i++) {
k_ipad[i] = key[i] ^ 0x36;
k_opad[i] = key[i] ^ 0x5c;
}
explicit_bzero(key, sizeof (key));
/* Start inner SHA512. */
SHA512_Init(&ctx->innerctx);
SHA512_Update(&ctx->innerctx, k_ipad, sizeof (k_ipad));
explicit_bzero(k_ipad, sizeof (k_ipad));
/* Start outer SHA512. */
SHA512_Init(&ctx->outerctx);
SHA512_Update(&ctx->outerctx, k_opad, sizeof (k_opad));
explicit_bzero(k_opad, sizeof (k_opad));
}
void
crypto_mac_update(struct hmac_ctx *ctx, const void *data, size_t datasize)
{
SHA512_Update(&ctx->innerctx, data, datasize);
}
void
crypto_mac_final(struct hmac_ctx *ctx, void *md, size_t mdsize)
{
uint8_t digest[SHA512_DIGEST_LENGTH];
/* Complete inner hash */
SHA512_Final(digest, &ctx->innerctx);
/* Complete outer hash */
SHA512_Update(&ctx->outerctx, digest, sizeof (digest));
SHA512_Final(digest, &ctx->outerctx);
explicit_bzero(ctx, sizeof (*ctx));
/* mdsize == 0 means "Give me the whole hash!" */
if (mdsize == 0)
mdsize = SHA512_DIGEST_LENGTH;
bcopy(digest, md, mdsize);
explicit_bzero(digest, sizeof (digest));
}
void
crypto_mac(const crypto_key_t *key, const void *in_data, size_t in_data_size,
void *out_data, size_t out_data_size)
{
struct hmac_ctx ctx;
crypto_mac_init(&ctx, key);
crypto_mac_update(&ctx, in_data, in_data_size);
crypto_mac_final(&ctx, out_data, out_data_size);
}
static int
freebsd_zfs_crypt_done(struct cryptop *crp)
{
freebsd_crypt_session_t *ses;
ses = crp->crp_opaque;
mtx_lock(&ses->fs_lock);
ses->fs_done = true;
mtx_unlock(&ses->fs_lock);
wakeup(crp);
return (0);
}
void
freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
{
mtx_destroy(&sess->fs_lock);
crypto_freesession(sess->fs_sid);
explicit_bzero(sess, sizeof (*sess));
}
static int
zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
{
int error;
crp->crp_opaque = session;
crp->crp_callback = freebsd_zfs_crypt_done;
for (;;) {
error = crypto_dispatch(crp);
if (error)
break;
mtx_lock(&session->fs_lock);
while (session->fs_done == false)
msleep(crp, &session->fs_lock, PRIBIO,
"zfs_crypto", hz/5);
mtx_unlock(&session->fs_lock);
if (crp->crp_etype != EAGAIN) {
error = crp->crp_etype;
break;
}
crp->crp_etype = 0;
crp->crp_flags &= ~CRYPTO_F_DONE;
session->fs_done = false;
#if __FreeBSD_version < 1300087
/*
* Session ID changed, so we should record that,
* and try again
*/
session->fs_sid = crp->crp_session;
#endif
}
return (error);
}
static void
freebsd_crypt_uio_debug_log(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
size_t auth_len)
{
#ifdef FCRYPTO_DEBUG
struct cryptodesc *crd;
uint8_t *p = NULL;
size_t total = 0;
printf("%s(%s, %p, { %s, %d, %d, %s }, %p, { %d, %p, %u }, "
"%p, %u, %u)\n",
__FUNCTION__, encrypt ? "encrypt" : "decrypt", input_sessionp,
c_info->ci_algname, c_info->ci_crypt_type,
(unsigned int)c_info->ci_keylen, c_info->ci_name,
data_uio, key->ck_format, key->ck_data,
(unsigned int)key->ck_length,
ivbuf, (unsigned int)datalen, (unsigned int)auth_len);
printf("\tkey = { ");
for (int i = 0; i < key->ck_length / 8; i++) {
uint8_t *b = (uint8_t *)key->ck_data;
printf("%02x ", b[i]);
}
printf("}\n");
for (int i = 0; i < data_uio->uio_iovcnt; i++) {
printf("\tiovec #%d: <%p, %u>\n", i,
data_uio->uio_iov[i].iov_base,
(unsigned int)data_uio->uio_iov[i].iov_len);
total += data_uio->uio_iov[i].iov_len;
}
data_uio->uio_resid = total;
#endif
}
/*
* Create a new cryptographic session. This should
* happen every time the key changes (including when
* it's first loaded).
*/
#if __FreeBSD_version >= 1300087
int
freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
struct zio_crypt_info *c_info, crypto_key_t *key)
{
struct crypto_session_params csp;
int error = 0;
#ifdef FCRYPTO_DEBUG
printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
__FUNCTION__, sessp,
c_info->ci_algname, c_info->ci_crypt_type,
(unsigned int)c_info->ci_keylen, c_info->ci_name,
key->ck_format, key->ck_data, (unsigned int)key->ck_length);
printf("\tkey = { ");
for (int i = 0; i < key->ck_length / 8; i++) {
uint8_t *b = (uint8_t *)key->ck_data;
printf("%02x ", b[i]);
}
printf("}\n");
#endif
bzero(&csp, sizeof (csp));
csp.csp_mode = CSP_MODE_AEAD;
csp.csp_cipher_key = key->ck_data;
csp.csp_cipher_klen = key->ck_length / 8;
switch (c_info->ci_crypt_type) {
case ZC_TYPE_GCM:
csp.csp_cipher_alg = CRYPTO_AES_NIST_GCM_16;
csp.csp_ivlen = AES_GCM_IV_LEN;
switch (key->ck_length/8) {
case AES_128_GMAC_KEY_LEN:
case AES_192_GMAC_KEY_LEN:
case AES_256_GMAC_KEY_LEN:
break;
default:
error = EINVAL;
goto bad;
}
break;
case ZC_TYPE_CCM:
csp.csp_cipher_alg = CRYPTO_AES_CCM_16;
csp.csp_ivlen = AES_CCM_IV_LEN;
switch (key->ck_length/8) {
case AES_128_CBC_MAC_KEY_LEN:
case AES_192_CBC_MAC_KEY_LEN:
case AES_256_CBC_MAC_KEY_LEN:
break;
default:
error = EINVAL;
goto bad;
break;
}
break;
default:
error = ENOTSUP;
goto bad;
}
error = crypto_newsession(&sessp->fs_sid, &csp,
CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
NULL, MTX_DEF);
crypt_sessions++;
bad:
#ifdef FCRYPTO_DEBUG
if (error)
printf("%s: returning error %d\n", __FUNCTION__, error);
#endif
return (error);
}
int
freebsd_crypt_uio(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
size_t auth_len)
{
struct cryptop *crp;
freebsd_crypt_session_t *session = NULL;
int error = 0;
size_t total = 0;
freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
key, ivbuf, datalen, auth_len);
for (int i = 0; i < data_uio->uio_iovcnt; i++)
total += data_uio->uio_iov[i].iov_len;
data_uio->uio_resid = total;
if (input_sessionp == NULL) {
session = kmem_zalloc(sizeof (*session), KM_SLEEP);
error = freebsd_crypt_newsession(session, c_info, key);
if (error)
goto out;
} else
session = input_sessionp;
crp = crypto_getreq(session->fs_sid, M_WAITOK);
if (encrypt) {
crp->crp_op = CRYPTO_OP_ENCRYPT |
CRYPTO_OP_COMPUTE_DIGEST;
} else {
crp->crp_op = CRYPTO_OP_DECRYPT |
CRYPTO_OP_VERIFY_DIGEST;
}
crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_IV_SEPARATE;
crp->crp_buf_type = CRYPTO_BUF_UIO;
crp->crp_uio = (void*)data_uio;
crp->crp_ilen = data_uio->uio_resid;
crp->crp_aad_start = 0;
crp->crp_aad_length = auth_len;
crp->crp_payload_start = auth_len;
crp->crp_payload_length = datalen;
crp->crp_digest_start = auth_len + datalen;
bcopy(ivbuf, crp->crp_iv, ZIO_DATA_IV_LEN);
error = zfs_crypto_dispatch(session, crp);
crypto_freereq(crp);
out:
#ifdef FCRYPTO_DEBUG
if (error)
printf("%s: returning error %d\n", __FUNCTION__, error);
#endif
if (input_sessionp == NULL) {
freebsd_crypt_freesession(session);
kmem_free(session, sizeof (*session));
}
return (error);
}
#else
int
freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
struct zio_crypt_info *c_info, crypto_key_t *key)
{
struct cryptoini cria, crie, *crip;
struct enc_xform *xform;
struct auth_hash *xauth;
int error = 0;
crypto_session_t sid;
#ifdef FCRYPTO_DEBUG
printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
__FUNCTION__, sessp,
c_info->ci_algname, c_info->ci_crypt_type,
(unsigned int)c_info->ci_keylen, c_info->ci_name,
key->ck_format, key->ck_data, (unsigned int)key->ck_length);
printf("\tkey = { ");
for (int i = 0; i < key->ck_length / 8; i++) {
uint8_t *b = (uint8_t *)key->ck_data;
printf("%02x ", b[i]);
}
printf("}\n");
#endif
switch (c_info->ci_crypt_type) {
case ZC_TYPE_GCM:
xform = &enc_xform_aes_nist_gcm;
switch (key->ck_length/8) {
case AES_128_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_128;
break;
case AES_192_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_192;
break;
case AES_256_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_256;
break;
default:
error = EINVAL;
goto bad;
}
break;
case ZC_TYPE_CCM:
xform = &enc_xform_ccm;
switch (key->ck_length/8) {
case AES_128_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_128;
break;
case AES_192_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_192;
break;
case AES_256_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_256;
break;
default:
error = EINVAL;
goto bad;
break;
}
break;
default:
error = ENOTSUP;
goto bad;
}
#ifdef FCRYPTO_DEBUG
printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
"auth %s (key length %d)\n",
__FUNCTION__, __LINE__,
xform->name, (unsigned int)key->ck_length,
(unsigned int)key->ck_length/8,
xauth->name, xauth->keysize);
#endif
bzero(&crie, sizeof (crie));
bzero(&cria, sizeof (cria));
crie.cri_alg = xform->type;
crie.cri_key = key->ck_data;
crie.cri_klen = key->ck_length;
cria.cri_alg = xauth->type;
cria.cri_key = key->ck_data;
cria.cri_klen = key->ck_length;
cria.cri_next = &crie;
crie.cri_next = NULL;
crip = &cria;
// Everything else is bzero'd
error = crypto_newsession(&sid, crip,
CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
if (error != 0) {
printf("%s(%d): crypto_newsession failed with %d\n",
__FUNCTION__, __LINE__, error);
goto bad;
}
sessp->fs_sid = sid;
mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
NULL, MTX_DEF);
crypt_sessions++;
bad:
return (error);
}
/*
* The meat of encryption/decryption.
* If sessp is NULL, then it will create a
* temporary cryptographic session, and release
* it when done.
*/
int
freebsd_crypt_uio(boolean_t encrypt,
freebsd_crypt_session_t *input_sessionp,
struct zio_crypt_info *c_info,
uio_t *data_uio,
crypto_key_t *key,
uint8_t *ivbuf,
size_t datalen,
size_t auth_len)
{
struct cryptop *crp;
struct cryptodesc *enc_desc, *auth_desc;
struct enc_xform *xform;
struct auth_hash *xauth;
freebsd_crypt_session_t *session = NULL;
int error;
freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
key, ivbuf, datalen, auth_len);
switch (c_info->ci_crypt_type) {
case ZC_TYPE_GCM:
xform = &enc_xform_aes_nist_gcm;
switch (key->ck_length/8) {
case AES_128_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_128;
break;
case AES_192_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_192;
break;
case AES_256_GMAC_KEY_LEN:
xauth = &auth_hash_nist_gmac_aes_256;
break;
default:
error = EINVAL;
goto bad;
}
break;
case ZC_TYPE_CCM:
xform = &enc_xform_ccm;
switch (key->ck_length/8) {
case AES_128_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_128;
break;
case AES_192_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_192;
break;
case AES_256_CBC_MAC_KEY_LEN:
xauth = &auth_hash_ccm_cbc_mac_256;
break;
default:
error = EINVAL;
goto bad;
break;
}
break;
default:
error = ENOTSUP;
goto bad;
}
#ifdef FCRYPTO_DEBUG
printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
"auth %s (key length %d)\n",
__FUNCTION__, __LINE__,
xform->name, (unsigned int)key->ck_length,
(unsigned int)key->ck_length/8,
xauth->name, xauth->keysize);
#endif
if (input_sessionp == NULL) {
session = kmem_zalloc(sizeof (*session), KM_SLEEP);
error = freebsd_crypt_newsession(session, c_info, key);
if (error)
goto out;
} else
session = input_sessionp;
crp = crypto_getreq(2);
if (crp == NULL) {
error = ENOMEM;
goto bad;
}
auth_desc = crp->crp_desc;
enc_desc = auth_desc->crd_next;
crp->crp_session = session->fs_sid;
crp->crp_ilen = auth_len + datalen;
crp->crp_buf = (void*)data_uio;
crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC;
auth_desc->crd_skip = 0;
auth_desc->crd_len = auth_len;
auth_desc->crd_inject = auth_len + datalen;
auth_desc->crd_alg = xauth->type;
#ifdef FCRYPTO_DEBUG
printf("%s: auth: skip = %u, len = %u, inject = %u\n",
__FUNCTION__, auth_desc->crd_skip, auth_desc->crd_len,
auth_desc->crd_inject);
#endif
enc_desc->crd_skip = auth_len;
enc_desc->crd_len = datalen;
enc_desc->crd_inject = auth_len;
enc_desc->crd_alg = xform->type;
enc_desc->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
bcopy(ivbuf, enc_desc->crd_iv, ZIO_DATA_IV_LEN);
enc_desc->crd_next = NULL;
#ifdef FCRYPTO_DEBUG
printf("%s: enc: skip = %u, len = %u, inject = %u\n",
__FUNCTION__, enc_desc->crd_skip, enc_desc->crd_len,
enc_desc->crd_inject);
#endif
if (encrypt)
enc_desc->crd_flags |= CRD_F_ENCRYPT;
error = zfs_crypto_dispatch(session, crp);
crypto_freereq(crp);
out:
if (input_sessionp == NULL) {
freebsd_crypt_freesession(session);
kmem_free(session, sizeof (*session));
}
bad:
#ifdef FCRYPTO_DEBUG
if (error)
printf("%s: returning error %d\n", __FUNCTION__, error);
#endif
return (error);
}
#endif
+346
View File
@@ -0,0 +1,346 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/dbuf.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_prop.h>
#include <sys/dmu_zfetch.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/sa.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <sys/zfs_rlock.h>
#include <sys/racct.h>
#include <sys/vm.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_vnops.h>
#ifndef IDX_TO_OFF
#define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
#endif
#if __FreeBSD_version < 1300051
#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
#else
#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
#endif
#if __FreeBSD_version < 1300072
#define dmu_page_lock(m) vm_page_lock(m)
#define dmu_page_unlock(m) vm_page_unlock(m)
#else
#define dmu_page_lock(m)
#define dmu_page_unlock(m)
#endif
static int
dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
{
dnode_t *dn;
int err;
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
numbufsp, dbpp, DMU_READ_PREFETCH);
dnode_rele(dn, FTAG);
return (err);
}
int
dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
vm_page_t *ma, dmu_tx_t *tx)
{
dmu_buf_t **dbp;
struct sf_buf *sf;
int numbufs, i;
int err;
if (size == 0)
return (0);
err = dmu_buf_hold_array(os, object, offset, size,
FALSE, FTAG, &numbufs, &dbp);
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy, copied, thiscpy;
int bufoff;
dmu_buf_t *db = dbp[i];
caddr_t va;
ASSERT(size > 0);
ASSERT3U(db->db_size, >=, PAGESIZE);
bufoff = offset - db->db_offset;
tocpy = (int)MIN(db->db_size - bufoff, size);
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
else
dmu_buf_will_dirty(db, tx);
for (copied = 0; copied < tocpy; copied += PAGESIZE) {
ASSERT3U(ptoa((*ma)->pindex), ==,
db->db_offset + bufoff);
thiscpy = MIN(PAGESIZE, tocpy - copied);
va = zfs_map_page(*ma, &sf);
bcopy(va, (char *)db->db_data + bufoff, thiscpy);
zfs_unmap_page(sf);
ma += 1;
bufoff += PAGESIZE;
}
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
offset += tocpy;
size -= tocpy;
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
int
dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
int *rbehind, int *rahead, int last_size)
{
struct sf_buf *sf;
vm_object_t vmobj;
vm_page_t m;
dmu_buf_t **dbp;
dmu_buf_t *db;
caddr_t va;
int numbufs, i;
int bufoff, pgoff, tocpy;
int mi, di;
int err;
ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
ASSERT(last_size <= PAGE_SIZE);
err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
if (err != 0)
return (err);
#ifdef DEBUG
IMPLY(last_size < PAGE_SIZE, *rahead == 0);
if (dbp[0]->db_offset != 0 || numbufs > 1) {
for (i = 0; i < numbufs; i++) {
ASSERT(ISP2(dbp[i]->db_size));
ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0);
ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
}
}
#endif
vmobj = ma[0]->object;
zfs_vmobject_wlock(vmobj);
db = dbp[0];
for (i = 0; i < *rbehind; i++) {
m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i,
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
if (m == NULL)
break;
if (!vm_page_none_valid(m)) {
ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
vm_page_do_sunbusy(m);
break;
}
ASSERT(m->dirty == 0);
ASSERT(!pmap_page_is_mapped(m));
ASSERT(db->db_size > PAGE_SIZE);
bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
va = zfs_map_page(m, &sf);
bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
zfs_unmap_page(sf);
vm_page_valid(m);
dmu_page_lock(m);
if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
vm_page_activate(m);
else
vm_page_deactivate(m);
dmu_page_unlock(m);
vm_page_do_sunbusy(m);
}
*rbehind = i;
bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
pgoff = 0;
for (mi = 0, di = 0; mi < count && di < numbufs; ) {
if (pgoff == 0) {
m = ma[mi];
if (m != bogus_page) {
vm_page_assert_xbusied(m);
ASSERT(vm_page_none_valid(m));
ASSERT(m->dirty == 0);
ASSERT(!pmap_page_is_mapped(m));
va = zfs_map_page(m, &sf);
}
}
if (bufoff == 0)
db = dbp[di];
if (m != bogus_page) {
ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
db->db_offset + bufoff);
}
/*
* We do not need to clamp the copy size by the file
* size as the last block is zero-filled beyond the
* end of file anyway.
*/
tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
if (m != bogus_page)
bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
pgoff += tocpy;
ASSERT(pgoff <= PAGESIZE);
if (pgoff == PAGESIZE) {
if (m != bogus_page) {
zfs_unmap_page(sf);
vm_page_valid(m);
}
ASSERT(mi < count);
mi++;
pgoff = 0;
}
bufoff += tocpy;
ASSERT(bufoff <= db->db_size);
if (bufoff == db->db_size) {
ASSERT(di < numbufs);
di++;
bufoff = 0;
}
}
#ifdef DEBUG
/*
* Three possibilities:
* - last requested page ends at a buffer boundary and , thus,
* all pages and buffers have been iterated;
* - all requested pages are filled, but the last buffer
* has not been exhausted;
* the read-ahead is possible only in this case;
* - all buffers have been read, but the last page has not been
* fully filled;
* this is only possible if the file has only a single buffer
* with a size that is not a multiple of the page size.
*/
if (mi == count) {
ASSERT(di >= numbufs - 1);
IMPLY(*rahead != 0, di == numbufs - 1);
IMPLY(*rahead != 0, bufoff != 0);
ASSERT(pgoff == 0);
}
if (di == numbufs) {
ASSERT(mi >= count - 1);
ASSERT(*rahead == 0);
IMPLY(pgoff == 0, mi == count);
if (pgoff != 0) {
ASSERT(mi == count - 1);
ASSERT((dbp[0]->db_size & PAGE_MASK) != 0);
}
}
#endif
if (pgoff != 0) {
ASSERT(m != bogus_page);
bzero(va + pgoff, PAGESIZE - pgoff);
zfs_unmap_page(sf);
vm_page_valid(m);
}
for (i = 0; i < *rahead; i++) {
m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i,
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
if (m == NULL)
break;
if (!vm_page_none_valid(m)) {
ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
vm_page_do_sunbusy(m);
break;
}
ASSERT(m->dirty == 0);
ASSERT(!pmap_page_is_mapped(m));
ASSERT(db->db_size > PAGE_SIZE);
bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
tocpy = MIN(db->db_size - bufoff, PAGESIZE);
va = zfs_map_page(m, &sf);
bcopy((char *)db->db_data + bufoff, va, tocpy);
if (tocpy < PAGESIZE) {
ASSERT(i == *rahead - 1);
ASSERT((db->db_size & PAGE_MASK) != 0);
bzero(va + tocpy, PAGESIZE - tocpy);
}
zfs_unmap_page(sf);
vm_page_valid(m);
dmu_page_lock(m);
if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
vm_page_activate(m);
else
vm_page_deactivate(m);
dmu_page_unlock(m);
vm_page_do_sunbusy(m);
}
*rahead = i;
zfs_vmobject_wunlock(vmobj);
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (0);
}
+102
View File
@@ -0,0 +1,102 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
*/
#include <sys/dmu.h>
#include <sys/hkdf.h>
#include <sys/freebsd_crypto.h>
#include <sys/hkdf.h>
static int
hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material,
uint_t km_len, uint8_t *out_buf)
{
crypto_key_t key;
/* initialize the salt as a crypto key */
key.ck_format = CRYPTO_KEY_RAW;
key.ck_length = CRYPTO_BYTES2BITS(salt_len);
key.ck_data = salt;
crypto_mac(&key, key_material, km_len, out_buf, SHA512_DIGEST_LENGTH);
return (0);
}
static int
hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len,
uint8_t *out_buf, uint_t out_len)
{
struct hmac_ctx ctx;
crypto_key_t key;
uint_t i, T_len = 0, pos = 0;
uint8_t c;
uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH;
uint8_t T[SHA512_DIGEST_LENGTH];
if (N > 255)
return (SET_ERROR(EINVAL));
/* initialize the salt as a crypto key */
key.ck_format = CRYPTO_KEY_RAW;
key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH);
key.ck_data = extract_key;
for (i = 1; i <= N; i++) {
c = i;
crypto_mac_init(&ctx, &key);
crypto_mac_update(&ctx, T, T_len);
crypto_mac_update(&ctx, info, info_len);
crypto_mac_update(&ctx, &c, 1);
crypto_mac_final(&ctx, T, SHA512_DIGEST_LENGTH);
bcopy(T, out_buf + pos,
(i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos));
pos += SHA512_DIGEST_LENGTH;
}
return (0);
}
/*
* HKDF is designed to be a relatively fast function for deriving keys from a
* master key + a salt. We use this function to generate new encryption keys
* so as to avoid hitting the cryptographic limits of the underlying
* encryption modes. Note that, for the sake of deriving encryption keys, the
* info parameter is called the "salt" everywhere else in the code.
*/
int
hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
uint_t out_len)
{
int ret;
uint8_t extract_key[SHA512_DIGEST_LENGTH];
ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len,
extract_key);
if (ret != 0)
return (ret);
ret = hkdf_sha512_expand(extract_key, info, info_len, output_key,
out_len);
if (ret != 0)
return (ret);
return (0);
}
+404
View File
@@ -0,0 +1,404 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/eventhandler.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#include <sys/zap.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
#include <sys/dmu.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/fm/util.h>
#include <sys/sunddi.h>
#include <sys/policy.h>
#include <sys/zone.h>
#include <sys/nvpair.h>
#include <sys/mount.h>
#include <sys/taskqueue.h>
#include <sys/sdt.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_onexit.h>
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zcp.h>
#include <sys/zio_checksum.h>
#include <sys/vdev_removal.h>
#include <sys/dsl_crypt.h>
#include <sys/zfs_ioctl_compat.h>
#include <sys/zfs_ioctl_impl.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
#include "zfs_deleg.h"
#include "zfs_comutil.h"
SYSCTL_DECL(_vfs_zfs);
SYSCTL_DECL(_vfs_zfs_vdev);
static int zfs_version_ioctl = ZFS_IOCVER_ZOF;
SYSCTL_DECL(_vfs_zfs_version);
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl,
0, "ZFS_IOCTL_VERSION");
static struct cdev *zfsdev;
extern void zfs_init(void);
extern void zfs_fini(void);
extern void zfs_ioctl_init(void);
static struct root_hold_token *zfs_root_token;
extern uint_t rrw_tsd_key;
extern uint_t zfs_allow_log_key;
extern uint_t zfs_geom_probe_vdev_key;
static int zfs__init(void);
static int zfs__fini(void);
static void zfs_shutdown(void *, int);
static eventhandler_tag zfs_shutdown_event_tag;
extern zfsdev_state_t *zfsdev_state_list;
#define ZFS_MIN_KSTACK_PAGES 4
static void
zfs_cmd_bsd12_to_zof(zfs_cmd_legacy_t *src, zfs_cmd_t *dst)
{
memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
*&dst->zc_objset_stats = *&src->zc_objset_stats;
memcpy(&dst->zc_begin_record, &src->zc_begin_record,
offsetof(zfs_cmd_t, zc_sendobj) -
offsetof(zfs_cmd_t, zc_begin_record));
memcpy(&dst->zc_sendobj, &src->zc_sendobj,
sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
dst->zc_zoneid = src->zc_jailid;
}
static void
zfs_cmd_zof_to_bsd12(zfs_cmd_t *src, zfs_cmd_legacy_t *dst)
{
memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
*&dst->zc_objset_stats = *&src->zc_objset_stats;
memcpy(&dst->zc_begin_record, &src->zc_begin_record,
offsetof(zfs_cmd_t, zc_sendobj) -
offsetof(zfs_cmd_t, zc_begin_record));
memcpy(&dst->zc_sendobj, &src->zc_sendobj,
sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
dst->zc_jailid = src->zc_zoneid;
}
static int
zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
struct thread *td)
{
uint_t len, vecnum;
zfs_iocparm_t *zp;
zfs_cmd_t *zc;
zfs_cmd_legacy_t *zcl;
int rc, error;
void *uaddr;
len = IOCPARM_LEN(zcmd);
vecnum = zcmd & 0xff;
zp = (void *)arg;
uaddr = (void *)zp->zfs_cmd;
error = 0;
zcl = NULL;
if (len != sizeof (zfs_iocparm_t)) {
printf("len %d vecnum: %d sizeof (zfs_cmd_t) %lu\n",
len, vecnum, sizeof (zfs_cmd_t));
return (EINVAL);
}
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
/*
* Remap ioctl code for legacy user binaries
*/
if (zp->zfs_ioctl_version == ZFS_IOCVER_FREEBSD) {
if (vecnum >= sizeof (zfs_ioctl_bsd12_to_zof)/sizeof (long)) {
kmem_free(zc, sizeof (zfs_cmd_t));
return (ENOTSUP);
}
zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
vecnum = zfs_ioctl_bsd12_to_zof[vecnum];
if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
error = SET_ERROR(EFAULT);
goto out;
}
zfs_cmd_bsd12_to_zof(zcl, zc);
} else if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
error = SET_ERROR(EFAULT);
goto out;
}
error = zfsdev_ioctl_common(vecnum, zc);
if (zcl) {
zfs_cmd_zof_to_bsd12(zc, zcl);
rc = copyout(zcl, uaddr, sizeof (*zcl));
} else {
rc = copyout(zc, uaddr, sizeof (*zc));
}
if (error == 0 && rc != 0)
error = SET_ERROR(EFAULT);
out:
if (zcl)
kmem_free(zcl, sizeof (zfs_cmd_legacy_t));
kmem_free(zc, sizeof (zfs_cmd_t));
return (error);
}
static void
zfsdev_close(void *data)
{
zfsdev_state_t *zs, *zsp = data;
mutex_enter(&zfsdev_state_lock);
for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
if (zs == zsp)
break;
}
if (zs == NULL || zs->zs_minor <= 0) {
mutex_exit(&zfsdev_state_lock);
return;
}
zs->zs_minor = -1;
zfs_onexit_destroy(zs->zs_onexit);
zfs_zevent_destroy(zs->zs_zevent);
mutex_exit(&zfsdev_state_lock);
}
static int
zfs_ctldev_init(struct cdev *devp)
{
boolean_t newzs = B_FALSE;
minor_t minor;
zfsdev_state_t *zs, *zsprev = NULL;
ASSERT(MUTEX_HELD(&zfsdev_state_lock));
minor = zfsdev_minor_alloc();
if (minor == 0)
return (SET_ERROR(ENXIO));
for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
if (zs->zs_minor == -1)
break;
zsprev = zs;
}
if (!zs) {
zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
newzs = B_TRUE;
}
devfs_set_cdevpriv(zs, zfsdev_close);
zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
if (newzs) {
zs->zs_minor = minor;
wmb();
zsprev->zs_next = zs;
} else {
wmb();
zs->zs_minor = minor;
}
return (0);
}
static int
zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
{
int error;
mutex_enter(&zfsdev_state_lock);
error = zfs_ctldev_init(devp);
mutex_exit(&zfsdev_state_lock);
return (error);
}
static struct cdevsw zfs_cdevsw = {
.d_version = D_VERSION,
.d_open = zfsdev_open,
.d_ioctl = zfsdev_ioctl,
.d_name = ZFS_DRIVER
};
int
zfsdev_attach(void)
{
zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
ZFS_DRIVER);
return (0);
}
void
zfsdev_detach(void)
{
if (zfsdev != NULL)
destroy_dev(zfsdev);
}
int
zfs__init(void)
{
int error;
#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
"overflow panic!\nPlease consider adding "
"'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
ZFS_MIN_KSTACK_PAGES);
#endif
zfs_root_token = root_mount_hold("ZFS");
if ((error = zfs_kmod_init()) != 0) {
printf("ZFS: Failed to Load ZFS Filesystem"
", rc = %d\n", error);
root_mount_rel(zfs_root_token);
return (error);
}
tsd_create(&zfs_geom_probe_vdev_key, NULL);
printf("ZFS storage pool version: features support ("
SPA_VERSION_STRING ")\n");
root_mount_rel(zfs_root_token);
ddi_sysevent_init();
return (0);
}
int
zfs__fini(void)
{
if (zfs_busy() || zvol_busy() ||
zio_injection_enabled) {
return (EBUSY);
}
zfs_kmod_fini();
tsd_destroy(&zfs_geom_probe_vdev_key);
return (0);
}
static void
zfs_shutdown(void *arg __unused, int howto __unused)
{
/*
* ZFS fini routines can not properly work in a panic-ed system.
*/
if (panicstr == NULL)
zfs__fini();
}
static int
zfs_modevent(module_t mod, int type, void *unused __unused)
{
int err;
switch (type) {
case MOD_LOAD:
err = zfs__init();
if (err == 0)
zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
shutdown_post_sync, zfs_shutdown, NULL,
SHUTDOWN_PRI_FIRST);
return (err);
case MOD_UNLOAD:
err = zfs__fini();
if (err == 0 && zfs_shutdown_event_tag != NULL)
EVENTHANDLER_DEREGISTER(shutdown_post_sync,
zfs_shutdown_event_tag);
return (err);
case MOD_SHUTDOWN:
return (0);
default:
break;
}
return (EOPNOTSUPP);
}
static moduledata_t zfs_mod = {
"zfsctrl",
zfs_modevent,
0
};
#ifdef _KERNEL
EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
#endif
DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
MODULE_VERSION(zfsctrl, 1);
MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1);
MODULE_DEPEND(zfsctrl, cryptodev, 1, 1, 1);
+280
View File
@@ -0,0 +1,280 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/fm/fs/zfs.h>
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#include <sys/dmu.h>
#include <sys/dmu_tx.h>
#include <sys/zap.h>
#include <sys/zil.h>
#include <sys/ddt.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_removal.h>
#include <sys/vdev_indirect_mapping.h>
#include <sys/vdev_indirect_births.h>
#include <sys/metaslab.h>
#include <sys/metaslab_impl.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
#include <sys/avl.h>
#include <sys/bpobj.h>
#include <sys/dmu_traverse.h>
#include <sys/dmu_objset.h>
#include <sys/unique.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_synctask.h>
#include <sys/fs/zfs.h>
#include <sys/arc.h>
#include <sys/callb.h>
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zvol.h>
#include <sys/abd.h>
#include <sys/callb.h>
#include <sys/zone.h>
#include "zfs_prop.h"
#include "zfs_comutil.h"
extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs,
uint64_t *count);
static nvlist_t *
spa_generate_rootconf(const char *name)
{
nvlist_t **configs, **tops;
nvlist_t *config;
nvlist_t *best_cfg, *nvtop, *nvroot;
uint64_t *holes;
uint64_t best_txg;
uint64_t nchildren;
uint64_t pgid;
uint64_t count;
uint64_t i;
uint_t nholes;
if (vdev_geom_read_pool_label(name, &configs, &count) != 0)
return (NULL);
ASSERT3U(count, !=, 0);
best_txg = 0;
for (i = 0; i < count; i++) {
uint64_t txg;
VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG,
&txg) == 0);
if (txg > best_txg) {
best_txg = txg;
best_cfg = configs[i];
}
}
nchildren = 1;
nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren);
holes = NULL;
nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY,
&holes, &nholes);
tops = kmem_zalloc(nchildren * sizeof (void *), KM_SLEEP);
for (i = 0; i < nchildren; i++) {
if (i >= count)
break;
if (configs[i] == NULL)
continue;
VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE,
&nvtop) == 0);
nvlist_dup(nvtop, &tops[i], KM_SLEEP);
}
for (i = 0; holes != NULL && i < nholes; i++) {
if (i >= nchildren)
continue;
if (tops[holes[i]] != NULL)
continue;
nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP);
VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE,
VDEV_TYPE_HOLE) == 0);
VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID,
holes[i]) == 0);
VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID,
0) == 0);
}
for (i = 0; i < nchildren; i++) {
if (tops[i] != NULL)
continue;
nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP);
VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE,
VDEV_TYPE_MISSING) == 0);
VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID,
i) == 0);
VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID,
0) == 0);
}
/*
* Create pool config based on the best vdev config.
*/
nvlist_dup(best_cfg, &config, KM_SLEEP);
/*
* Put this pool's top-level vdevs into a root vdev.
*/
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&pgid) == 0);
VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
VDEV_TYPE_ROOT) == 0);
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
tops, nchildren) == 0);
/*
* Replace the existing vdev_tree with the new root vdev in
* this pool's configuration (remove the old, add the new).
*/
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
/*
* Drop vdev config elements that should not be present at pool level.
*/
nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64);
nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64);
for (i = 0; i < count; i++)
nvlist_free(configs[i]);
kmem_free(configs, count * sizeof (void *));
for (i = 0; i < nchildren; i++)
nvlist_free(tops[i]);
kmem_free(tops, nchildren * sizeof (void *));
nvlist_free(nvroot);
return (config);
}
int
spa_import_rootpool(const char *name)
{
spa_t *spa;
vdev_t *rvd;
nvlist_t *config, *nvtop;
uint64_t txg;
char *pname;
int error;
/*
* Read the label from the boot device and generate a configuration.
*/
config = spa_generate_rootconf(name);
mutex_enter(&spa_namespace_lock);
if (config != NULL) {
VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
&pname) == 0 && strcmp(name, pname) == 0);
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg)
== 0);
if ((spa = spa_lookup(pname)) != NULL) {
/*
* The pool could already be imported,
* e.g., after reboot -r.
*/
if (spa->spa_state == POOL_STATE_ACTIVE) {
mutex_exit(&spa_namespace_lock);
nvlist_free(config);
return (0);
}
/*
* Remove the existing root pool from the namespace so
* that we can replace it with the correct config
* we just read in.
*/
spa_remove(spa);
}
spa = spa_add(pname, config, NULL);
/*
* Set spa_ubsync.ub_version as it can be used in vdev_alloc()
* via spa_version().
*/
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
&spa->spa_ubsync.ub_version) != 0)
spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
} else if ((spa = spa_lookup(name)) == NULL) {
mutex_exit(&spa_namespace_lock);
nvlist_free(config);
cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
name);
return (EIO);
} else {
VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0);
}
spa->spa_is_root = B_TRUE;
spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
/*
* Build up a vdev tree based on the boot device's label config.
*/
VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvtop) == 0);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
VDEV_ALLOC_ROOTPOOL);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error) {
mutex_exit(&spa_namespace_lock);
nvlist_free(config);
cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
pname);
return (error);
}
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_free(rvd);
spa_config_exit(spa, SCL_ALL, FTAG);
mutex_exit(&spa_namespace_lock);
nvlist_free(config);
return (0);
}
const char *
spa_history_zone(void)
{
return ("freebsd");
}
+114
View File
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
#include <sys/vdev_impl.h>
#include <sys/spa.h>
#include <zfs_comutil.h>
void
spa_stats_init(spa_t *spa)
{
}
void
spa_stats_destroy(spa_t *spa)
{
}
void
spa_iostats_trim_add(spa_t *spa, trim_type_t type,
uint64_t extents_written, uint64_t bytes_written,
uint64_t extents_skipped, uint64_t bytes_skipped,
uint64_t extents_failed, uint64_t bytes_failed)
{
}
void
spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
{
}
void
spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
{
}
/*
* Set txg state completion time and increment current state.
*/
int
spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
hrtime_t completed_time)
{
return (0);
}
txg_stat_t *
spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
{
return (NULL);
}
void
spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
{
}
void
spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
{
}
void
spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
int error)
{
}
int
spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
hrtime_t duration)
{
return (0);
}
int
spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
{
return (0);
}
+699
View File
@@ -0,0 +1,699 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#include <sys/zap.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
#include <sys/dmu.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
#include <sys/sunddi.h>
#include <sys/policy.h>
#include <sys/zone.h>
#include <sys/nvpair.h>
#include <sys/mount.h>
#include <sys/taskqueue.h>
#include <sys/sdt.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_onexit.h>
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zcp.h>
#include <sys/zio_checksum.h>
#include <sys/vdev_removal.h>
#include <sys/dsl_crypt.h>
#include <sys/zfs_ioctl_compat.h>
#include <sys/zfs_context.h>
#include <sys/arc_impl.h>
#include <sys/dsl_pool.h>
#include <../zfs_config.h>
/* BEGIN CSTYLED */
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS events");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "space allocation");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, reconstruct, CTLFLAG_RW, 0, "reconstruct");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, prefetch, CTLFLAG_RW, 0, "ZFS ZFETCH");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, multihost, CTLFLAG_RW, 0, "multihost protection");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, mg, CTLFLAG_RW, 0, "metaslab group");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, lua, CTLFLAG_RW, 0, "lua");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, l2arc, CTLFLAG_RW, 0, "l2arc");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0, "ZFS disk buf cache");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, deadman, CTLFLAG_RW, 0, "ZFS deadman");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS Adaptive Replacement Cache");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
"ZFS VDEV Mirror");
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, livelist, CTLFLAG_RW, 0, "livelist state");
SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0, "condense knobs");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, recv, CTLFLAG_RW, 0, "receive knobs");
SYSCTL_NODE(_vfs_zfs, OID_AUTO, send, CTLFLAG_RW, 0, "send knobs");
SYSCTL_DECL(_vfs_zfs_version);
SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
(ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
extern arc_state_t ARC_anon;
extern arc_state_t ARC_mru;
extern arc_state_t ARC_mru_ghost;
extern arc_state_t ARC_mfu;
extern arc_state_t ARC_mfu_ghost;
extern arc_state_t ARC_l2c_only;
/*
* minimum lifespan of a prefetch block in clock ticks
* (initialized in arc_init())
*/
/* arc.c */
/* legacy compat */
extern unsigned long l2arc_write_max; /* def max write size */
extern unsigned long l2arc_write_boost; /* extra warmup write */
extern unsigned long l2arc_headroom; /* # of dev writes */
extern unsigned long l2arc_headroom_boost;
extern unsigned long l2arc_feed_secs; /* interval seconds */
extern unsigned long l2arc_feed_min_ms; /* min interval msecs */
extern int l2arc_noprefetch; /* don't cache prefetch bufs */
extern int l2arc_feed_again; /* turbo warmup */
extern int l2arc_norw; /* no reads during writes */
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
&l2arc_write_max, 0, "max write size (LEGACY)");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
&l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
&l2arc_headroom, 0, "number of dev writes (LEGACY)");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
&l2arc_feed_secs, 0, "interval seconds (LEGACY)");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
&l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
&l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
&l2arc_feed_again, 0, "turbo warmup (LEGACY)");
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
&l2arc_norw, 0, "no reads during writes (LEGACY)");
#if 0
extern int zfs_compressed_arc_enabled;
SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RW,
&zfs_compressed_arc_enabled, 1, "compressed arc buffers (LEGACY)");
#endif
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
"size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
"size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
&ARC_mru.arcs_size.rc_count, 0, "size of mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
"size of metadata in mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
"size of data in mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
&ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
"size of metadata in mru ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
"size of data in mru ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
&ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
"size of metadata in mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
"size of data in mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
&ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
"size of metadata in mfu ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
"size of data in mfu ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
&ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
extern int arc_no_grow_shift;
extern int arc_shrink_shift;
extern arc_stats_t arc_stats;
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
#define arc_tempreserve ARCSTAT(arcstat_tempreserve)
#define arc_loaned_bytes ARCSTAT(arcstat_loaned_bytes)
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
static int
sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
{
uint32_t val;
int err;
val = arc_no_grow_shift;
err = sysctl_handle_32(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
if (val >= arc_shrink_shift)
return (EINVAL);
arc_no_grow_shift = val;
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, CTLTYPE_U32 | CTLFLAG_RWTUN,
0, sizeof (uint32_t), sysctl_vfs_zfs_arc_no_grow_shift, "U",
"log2(fraction of ARC which must be free to allow growing)");
/* dbuf.c */
/* dmu.c */
/* dmu_zfetch.c */
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH");
/* max bytes to prefetch per stream (default 8MB) */
extern uint32_t zfetch_max_distance;
SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
&zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
/* max bytes to prefetch indirects for per stream (default 64MB) */
extern uint32_t zfetch_max_idistance;
SYSCTL_UINT(_vfs_zfs_prefetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
&zfetch_max_idistance, 0, "Max bytes to prefetch indirects for per stream");
/* dsl_pool.c */
/* dnode.c */
extern int zfs_default_bs;
SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
&zfs_default_bs, 0, "Default dnode block shift");
extern int zfs_default_ibs;
SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
&zfs_default_ibs, 0, "Default dnode indirect block shift");
/* dsl_scan.c */
/* metaslab.c */
/*
* Enable/disable lba weighting (i.e. outer tracks are given preference).
*/
extern boolean_t metaslab_lba_weighting_enabled;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, lba_weighting, CTLFLAG_RWTUN,
&metaslab_lba_weighting_enabled, 0,
"Enable LBA weighting (i.e. outer tracks are given preference)");
/*
* In pools where the log space map feature is not enabled we touch
* multiple metaslabs (and their respective space maps) with each
* transaction group. Thus, we benefit from having a small space map
* block size since it allows us to issue more I/O operations scattered
* around the disk. So a sane default for the space map block size
* is 8~16K.
*/
extern int zfs_metaslab_sm_blksz_no_log;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
&zfs_metaslab_sm_blksz_no_log, 0,
"Block size for space map in pools with log space map disabled. "
"Power of 2 and greater than 4096.");
/*
* When the log space map feature is enabled, we accumulate a lot of
* changes per metaslab that are flushed once in a while so we benefit
* from a bigger block size like 128K for the metaslab space maps.
*/
extern int zfs_metaslab_sm_blksz_with_log;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
&zfs_metaslab_sm_blksz_with_log, 0,
"Block size for space map in pools with log space map enabled. "
"Power of 2 and greater than 4096.");
/*
* The in-core space map representation is more compact than its on-disk form.
* The zfs_condense_pct determines how much more compact the in-core
* space map representation must be before we compact it on-disk.
* Values should be greater than or equal to 100.
*/
extern int zfs_condense_pct;
SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
&zfs_condense_pct, 0,
"Condense on-disk spacemap when it is more than this many percents"
" of in-memory counterpart");
extern int zfs_remove_max_segment;
SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
&zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to"
" allocate when removing a device");
extern int zfs_removal_suspend_progress;
SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
&zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while"
" in the middle of a removal");
/*
* Minimum size which forces the dynamic allocator to change
* it's allocation strategy. Once the space map cannot satisfy
* an allocation of this size then it switches to using more
* aggressive strategy (i.e search by size rather than offset).
*/
extern uint64_t metaslab_df_alloc_threshold;
SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
&metaslab_df_alloc_threshold, 0,
"Minimum size which forces the dynamic allocator to change it's allocation strategy");
/*
* The minimum free space, in percent, which must be available
* in a space map to continue allocations in a first-fit fashion.
* Once the space map's free space drops below this level we dynamically
* switch to using best-fit allocations.
*/
extern int metaslab_df_free_pct;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
&metaslab_df_free_pct, 0,
"The minimum free space, in percent, which must be available in a "
"space map to continue allocations in a first-fit fashion");
/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
extern int metaslab_load_pct;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
&metaslab_load_pct, 0,
"Percentage of cpus that can be used by the metaslab taskq");
/*
* Max number of metaslabs per group to preload.
*/
extern int metaslab_preload_limit;
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
&metaslab_preload_limit, 0,
"Max number of metaslabs per group to preload");
/* refcount.c */
extern int reference_tracking_enable;
SYSCTL_INT(_vfs_zfs, OID_AUTO, reference_tracking_enable, CTLFLAG_RDTUN,
&reference_tracking_enable, 0,
"Track reference holders to refcount_t objects, used mostly by ZFS");
/* spa.c */
extern int zfs_ccw_retry_interval;
SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
&zfs_ccw_retry_interval, 0,
"Configuration cache file write, retry after failure, interval (seconds)");
extern uint64_t zfs_max_missing_tvds_cachefile;
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
&zfs_max_missing_tvds_cachefile, 0,
"allow importing pools with missing top-level vdevs in cache file");
extern uint64_t zfs_max_missing_tvds_scan;
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
&zfs_max_missing_tvds_scan, 0,
"allow importing pools with missing top-level vdevs during scan");
/* spa_misc.c */
extern int zfs_flags;
static int
sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
{
int err, val;
val = zfs_flags;
err = sysctl_handle_int(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
/*
* ZFS_DEBUG_MODIFY must be enabled prior to boot so all
* arc buffers in the system have the necessary additional
* checksum data. However, it is safe to disable at any
* time.
*/
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
val &= ~ZFS_DEBUG_MODIFY;
zfs_flags = val;
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0,
sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing.");
int
param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
{
unsigned long val;
int err;
val = zfs_deadman_synctime_ms;
err = sysctl_handle_long(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
zfs_deadman_synctime_ms = val;
spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
return (0);
}
int
param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
{
unsigned long val;
int err;
val = zfs_deadman_ziotime_ms;
err = sysctl_handle_long(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
zfs_deadman_ziotime_ms = val;
spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_synctime_ms));
return (0);
}
int
param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
{
char buf[16];
int rc;
if (req->newptr == NULL)
strlcpy(buf, zfs_deadman_failmode, sizeof (buf));
rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (rc || req->newptr == NULL)
return (rc);
if (strcmp(buf, zfs_deadman_failmode) == 0)
return (0);
if (!strcmp(buf, "wait"))
zfs_deadman_failmode = "wait";
if (!strcmp(buf, "continue"))
zfs_deadman_failmode = "continue";
if (!strcmp(buf, "panic"))
zfs_deadman_failmode = "panic";
return (-param_set_deadman_failmode_common(buf));
}
/* spacemap.c */
extern int space_map_ibs;
SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
&space_map_ibs, 0, "Space map indirect block shift");
/* vdev.c */
#ifdef notyet
extern uint64_t zfs_max_auto_ashift;
extern uint64_t zfs_min_auto_ashift;
static int
sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
{
uint64_t val;
int err;
val = zfs_max_auto_ashift;
err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
if (val > ASHIFT_MAX || val < zfs_min_auto_ashift)
return (EINVAL);
zfs_max_auto_ashift = val;
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint64_t),
sysctl_vfs_zfs_max_auto_ashift, "QU",
"Max ashift used when optimising for logical -> physical sectors size on "
"new top-level vdevs.");
static int
sysctl_vfs_zfs_min_auto_ashift(SYSCTL_HANDLER_ARGS)
{
uint64_t val;
int err;
val = zfs_min_auto_ashift;
err = sysctl_handle_64(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
if (val < ASHIFT_MIN || val > zfs_max_auto_ashift)
return (EINVAL);
zfs_min_auto_ashift = val;
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint64_t),
sysctl_vfs_zfs_min_auto_ashift, "QU",
"Min ashift used when creating new top-level vdevs.");
#endif
/*
* Since the DTL space map of a vdev is not expected to have a lot of
* entries, we default its block size to 4K.
*/
extern int zfs_vdev_dtl_sm_blksz;
SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
&zfs_vdev_dtl_sm_blksz, 0,
"Block size for DTL space map. Power of 2 and greater than 4096.");
/*
* vdev-wide space maps that have lots of entries written to them at
* the end of each transaction can benefit from a higher I/O bandwidth
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
*/
extern int zfs_vdev_standard_sm_blksz;
SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
&zfs_vdev_standard_sm_blksz, 0,
"Block size for standard space map. Power of 2 and greater than 4096.");
extern int vdev_validate_skip;
SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
&vdev_validate_skip, 0,
"Enable to bypass vdev_validate().");
/* vdev_cache.c */
/* vdev_mirror.c */
/*
* The load configuration settings below are tuned by default for
* the case where all devices are of the same rotational type.
*
* If there is a mixture of rotating and non-rotating media, setting
* non_rotating_seek_inc to 0 may well provide better results as it
* will direct more reads to the non-rotating vdevs which are more
* likely to have a higher performance.
*/
/* vdev_queue.c */
#define ZFS_VDEV_QUEUE_KNOB_MIN(name) \
extern uint32_t zfs_vdev_ ## name ## _min_active; \
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
&zfs_vdev_ ## name ## _min_active, 0, \
"Initial number of I/O requests of type " #name \
" active for each device");
#define ZFS_VDEV_QUEUE_KNOB_MAX(name) \
extern uint32_t zfs_vdev_ ## name ## _max_active; \
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN, \
&zfs_vdev_ ## name ## _max_active, 0, \
"Maximum number of I/O requests of type " #name \
" active for each device");
#undef ZFS_VDEV_QUEUE_KNOB
extern uint32_t zfs_vdev_max_active;
SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
&zfs_vdev_max_active, 0,
"The maximum number of I/Os of all types active for each device. (LEGACY)");
extern int zfs_vdev_def_queue_depth;
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
&zfs_vdev_def_queue_depth, 0,
"Default queue depth for each allocator");
/*extern uint64_t zfs_multihost_history;
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, multihost_history, CTLFLAG_RWTUN,
&zfs_multihost_history, 0,
"Historical staticists for the last N multihost updates");*/
#ifdef notyet
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
&vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
#endif
/* zio.c */
#if defined(__LP64__)
int zio_use_uma = 1;
#else
int zio_use_uma = 0;
#endif
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
"Use uma(9) for ZIO allocations");
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
"Exclude metadata buffers from dumps as well");
int
param_set_arc_long(SYSCTL_HANDLER_ARGS)
{
int err;
err = sysctl_handle_long(oidp, arg1, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
arc_tuning_update(B_TRUE);
return (0);
}
int
param_set_arc_int(SYSCTL_HANDLER_ARGS)
{
int err;
err = sysctl_handle_int(oidp, arg1, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
arc_tuning_update(B_TRUE);
return (0);
}
int
param_set_slop_shift(SYSCTL_HANDLER_ARGS)
{
int val;
int err;
val = *(int *)arg1;
err = sysctl_handle_int(oidp, &val, 0, req);
if (err != 0 || req->newptr == NULL)
return (err);
if (val < 1 || val > 31)
return (EINVAL);
*(int *)arg1 = val;
return (0);
}
+326
View File
@@ -0,0 +1,326 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
#include <sys/abd.h>
#include <sys/stat.h>
/*
* Virtual device vector for files.
*/
static taskq_t *vdev_file_taskq;
void
vdev_file_init(void)
{
vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
minclsyspri, max_ncpus, INT_MAX, 0);
}
void
vdev_file_fini(void)
{
taskq_destroy(vdev_file_taskq);
}
static void
vdev_file_hold(vdev_t *vd)
{
ASSERT(vd->vdev_path != NULL);
}
static void
vdev_file_rele(vdev_t *vd)
{
ASSERT(vd->vdev_path != NULL);
}
static mode_t
vdev_file_open_mode(spa_mode_t spa_mode)
{
mode_t mode = 0;
if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
mode = O_RDWR;
} else if (spa_mode & SPA_MODE_READ) {
mode = O_RDONLY;
} else if (spa_mode & SPA_MODE_WRITE) {
mode = O_WRONLY;
}
return (mode | O_LARGEFILE);
}
static int
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
uint64_t *ashift)
{
vdev_file_t *vf;
zfs_file_t *fp;
zfs_file_attr_t zfa;
int error;
/*
* Rotational optimizations only make sense on block devices.
*/
vd->vdev_nonrot = B_TRUE;
/*
* Allow TRIM on file based vdevs. This may not always be supported,
* since it depends on your kernel version and underlying filesystem
* type but it is always safe to attempt.
*/
vd->vdev_has_trim = B_TRUE;
/*
* Disable secure TRIM on file based vdevs. There is no way to
* request this behavior from the underlying filesystem.
*/
vd->vdev_has_securetrim = B_FALSE;
/*
* We must have a pathname, and it must be absolute.
*/
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
return (SET_ERROR(EINVAL));
}
/*
* Reopen the device if it's not currently open. Otherwise,
* just update the physical size of the device.
*/
if (vd->vdev_tsd != NULL) {
ASSERT(vd->vdev_reopening);
vf = vd->vdev_tsd;
goto skip_open;
}
vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
/*
* We always open the files from the root of the global zone, even if
* we're in a local zone. If the user has gotten to this point, the
* administrator has already decided that the pool should be available
* to local zone users, so the underlying devices should be as well.
*/
ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
error = zfs_file_open(vd->vdev_path,
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
if (error) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (error);
}
vf->vf_file = fp;
#ifdef _KERNEL
/*
* Make sure it's a regular file.
*/
if (zfs_file_getattr(fp, &zfa)) {
return (SET_ERROR(ENODEV));
}
if (!S_ISREG(zfa.zfa_mode)) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (SET_ERROR(ENODEV));
}
#endif
skip_open:
error = zfs_file_getattr(vf->vf_file, &zfa);
if (error) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (error);
}
*max_psize = *psize = zfa.zfa_size;
*ashift = SPA_MINBLOCKSHIFT;
return (0);
}
static void
vdev_file_close(vdev_t *vd)
{
vdev_file_t *vf = vd->vdev_tsd;
if (vd->vdev_reopening || vf == NULL)
return;
if (vf->vf_file != NULL) {
zfs_file_close(vf->vf_file);
}
vd->vdev_delayed_close = B_FALSE;
kmem_free(vf, sizeof (vdev_file_t));
vd->vdev_tsd = NULL;
}
/*
* Implements the interrupt side for file vdev types. This routine will be
* called when the I/O completes allowing us to transfer the I/O to the
* interrupt taskqs. For consistency, the code structure mimics disk vdev
* types.
*/
static void
vdev_file_io_intr(zio_t *zio)
{
zio_delay_interrupt(zio);
}
static void
vdev_file_io_strategy(void *arg)
{
zio_t *zio = arg;
vdev_t *vd = zio->io_vd;
vdev_file_t *vf;
void *buf;
ssize_t resid;
loff_t off;
ssize_t size;
int err;
off = zio->io_offset;
size = zio->io_size;
resid = 0;
vf = vd->vdev_tsd;
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
if (zio->io_type == ZIO_TYPE_READ) {
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
abd_return_buf_copy(zio->io_abd, buf, size);
} else {
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
abd_return_buf(zio->io_abd, buf, size);
}
if (resid != 0 && zio->io_error == 0)
zio->io_error = ENOSPC;
vdev_file_io_intr(zio);
}
static void
vdev_file_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
if (!vdev_readable(vd)) {
zio->io_error = SET_ERROR(ENXIO);
zio_interrupt(zio);
return;
}
switch (zio->io_cmd) {
case DKIOCFLUSHWRITECACHE:
zio->io_error = zfs_file_fsync(vf->vf_file,
O_SYNC|O_DSYNC);
break;
default:
zio->io_error = SET_ERROR(ENOTSUP);
}
zio_execute(zio);
return;
} else if (zio->io_type == ZIO_TYPE_TRIM) {
#ifdef notyet
int mode = 0;
ASSERT3U(zio->io_size, !=, 0);
/* XXX FreeBSD has no fallocate routine in file ops */
zio->io_error = zfs_file_fallocate(vf->vf_file,
mode, zio->io_offset, zio->io_size);
#endif
zio->io_error = SET_ERROR(ENOTSUP);
zio_execute(zio);
return;
}
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
zio->io_target_timestamp = zio_handle_io_delay(zio);
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
TQ_SLEEP), !=, 0);
}
/* ARGSUSED */
static void
vdev_file_io_done(zio_t *zio)
{
}
vdev_ops_t vdev_file_ops = {
vdev_file_open,
vdev_file_close,
vdev_default_asize,
vdev_file_io_start,
vdev_file_io_done,
NULL,
NULL,
vdev_file_hold,
vdev_file_rele,
NULL,
vdev_default_xlate,
VDEV_TYPE_FILE, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
/*
* From userland we access disks just like files.
*/
#ifndef _KERNEL
vdev_ops_t vdev_disk_ops = {
vdev_file_open,
vdev_file_close,
vdev_default_asize,
vdev_file_io_start,
vdev_file_io_done,
NULL,
NULL,
vdev_file_hold,
vdev_file_rele,
NULL,
vdev_default_xlate,
VDEV_TYPE_DISK, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
#endif
File diff suppressed because it is too large Load Diff
+74
View File
@@ -0,0 +1,74 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/vdev.h>
#include <sys/vdev_os.h>
#include <sys/vdev_impl.h>
#include <sys/uberblock_impl.h>
#include <sys/metaslab.h>
#include <sys/metaslab_impl.h>
#include <sys/zio.h>
#include <sys/dsl_scan.h>
#include <sys/abd.h>
#include <sys/fs/zfs.h>
int
vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
{
spa_t *spa = vd->vdev_spa;
zio_t *zio;
abd_t *pad2;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
int error;
if (size > VDEV_PAD_SIZE)
return (EINVAL);
if (!vd->vdev_ops->vdev_op_leaf)
return (ENODEV);
if (vdev_is_dead(vd))
return (ENXIO);
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
abd_zero(pad2, VDEV_PAD_SIZE);
abd_copy_from_buf(pad2, buf, size);
retry:
zio = zio_root(spa, NULL, NULL, flags);
vdev_label_write(zio, vd, 0, pad2,
offsetof(vdev_label_t, vl_pad2),
VDEV_PAD_SIZE, NULL, NULL, flags);
error = zio_wait(zio);
if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
flags |= ZIO_FLAG_TRYHARD;
goto retry;
}
abd_free(pad2);
return (error);
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+254
View File
@@ -0,0 +1,254 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/kstat.h>
typedef struct zfs_dbgmsg {
list_node_t zdm_node;
time_t zdm_timestamp;
int zdm_size;
char zdm_msg[1]; /* variable length allocation */
} zfs_dbgmsg_t;
list_t zfs_dbgmsgs;
int zfs_dbgmsg_size = 0;
kmutex_t zfs_dbgmsgs_lock;
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
kstat_t *zfs_dbgmsg_kstat;
/*
* Internal ZFS debug messages are enabled by default.
*
* # Print debug messages
* cat /proc/spl/kstat/zfs/dbgmsg
*
* # Disable the kernel debug message log.
* echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
*
* # Clear the kernel debug message log.
* echo 0 >/proc/spl/kstat/zfs/dbgmsg
*/
int zfs_dbgmsg_enable = 1;
static int
zfs_dbgmsg_headers(char *buf, size_t size)
{
(void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
return (0);
}
static int
zfs_dbgmsg_data(char *buf, size_t size, void *data)
{
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
(void) snprintf(buf, size, "%-12llu %-s\n",
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
return (0);
}
static void *
zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
{
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
if (n == 0)
ksp->ks_private = list_head(&zfs_dbgmsgs);
else if (zdm)
ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
return (ksp->ks_private);
}
static void
zfs_dbgmsg_purge(int max_size)
{
zfs_dbgmsg_t *zdm;
int size;
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
while (zfs_dbgmsg_size > max_size) {
zdm = list_remove_head(&zfs_dbgmsgs);
if (zdm == NULL)
return;
size = zdm->zdm_size;
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
}
static int
zfs_dbgmsg_update(kstat_t *ksp, int rw)
{
if (rw == KSTAT_WRITE)
zfs_dbgmsg_purge(0);
return (0);
}
void
zfs_dbgmsg_init(void)
{
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
offsetof(zfs_dbgmsg_t, zdm_node));
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
if (zfs_dbgmsg_kstat) {
zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
zfs_dbgmsg_kstat->ks_private = NULL;
zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
zfs_dbgmsg_data, zfs_dbgmsg_addr);
kstat_install(zfs_dbgmsg_kstat);
}
}
void
zfs_dbgmsg_fini(void)
{
if (zfs_dbgmsg_kstat)
kstat_delete(zfs_dbgmsg_kstat);
/*
* TODO - decide how to make this permanent
*/
#ifdef _KERNEL
mutex_enter(&zfs_dbgmsgs_lock);
zfs_dbgmsg_purge(0);
mutex_exit(&zfs_dbgmsgs_lock);
mutex_destroy(&zfs_dbgmsgs_lock);
#endif
}
void
__zfs_dbgmsg(char *buf)
{
zfs_dbgmsg_t *zdm;
int size;
DTRACE_PROBE1(zfs__dbgmsg, char *, buf);
size = sizeof (zfs_dbgmsg_t) + strlen(buf);
zdm = kmem_zalloc(size, KM_SLEEP);
zdm->zdm_size = size;
zdm->zdm_timestamp = gethrestime_sec();
strcpy(zdm->zdm_msg, buf);
mutex_enter(&zfs_dbgmsgs_lock);
list_insert_tail(&zfs_dbgmsgs, zdm);
zfs_dbgmsg_size += size;
zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
mutex_exit(&zfs_dbgmsgs_lock);
}
void
__set_error(const char *file, const char *func, int line, int err)
{
/*
* To enable this:
*
* $ echo 512 >/sys/module/zfs/parameters/zfs_flags
*/
if (zfs_flags & ZFS_DEBUG_SET_ERROR)
__dprintf(B_FALSE, file, func, line, "error %lu", err);
}
#ifdef _KERNEL
void
__dprintf(boolean_t dprint, const char *file, const char *func,
int line, const char *fmt, ...)
{
const char *newfile;
va_list adx;
size_t size;
char *buf;
char *nl;
int i;
size = 1024;
buf = kmem_alloc(size, KM_SLEEP);
/*
* Get rid of annoying prefix to filename.
*/
newfile = strrchr(file, '/');
if (newfile != NULL) {
newfile = newfile + 1; /* Get rid of leading / */
} else {
newfile = file;
}
i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
if (i < size) {
va_start(adx, fmt);
(void) vsnprintf(buf + i, size - i, fmt, adx);
va_end(adx);
}
/*
* Get rid of trailing newline.
*/
nl = strrchr(buf, '\n');
if (nl != NULL)
*nl = '\0';
__zfs_dbgmsg(buf);
kmem_free(buf, size);
}
#else
void
zfs_dbgmsg_print(const char *tag)
{
zfs_dbgmsg_t *zdm;
(void) printf("ZFS_DBGMSG(%s):\n", tag);
mutex_enter(&zfs_dbgmsgs_lock);
for (zdm = list_head(&zfs_dbgmsgs); zdm;
zdm = list_next(&zfs_dbgmsgs, zdm))
(void) printf("%s\n", zdm->zdm_msg);
mutex_exit(&zfs_dbgmsgs_lock);
}
#endif /* _KERNEL */
#ifdef _KERNEL
module_param(zfs_dbgmsg_enable, int, 0644);
MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
module_param(zfs_dbgmsg_maxsize, int, 0644);
MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
#endif
+961
View File
@@ -0,0 +1,961 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/extdirent.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/uio.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/unistd.h>
#include <sys/sunddi.h>
#include <sys/random.h>
#include <sys/policy.h>
#include <sys/condvar.h>
#include <sys/callb.h>
#include <sys/smp.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_acl.h>
#include <sys/fs/zfs.h>
#include <sys/zap.h>
#include <sys/dmu.h>
#include <sys/atomic.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#include <sys/zfs_sa.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>
/*
* zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups
* of names after deciding which is the appropriate lookup interface.
*/
static int
zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
matchtype_t mt, uint64_t *zoid)
{
int error;
if (zfsvfs->z_norm) {
/*
* In the non-mixed case we only expect there would ever
* be one match, but we need to use the normalizing lookup.
*/
error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
zoid, mt, NULL, 0, NULL);
} else {
error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
}
*zoid = ZFS_DIRENT_OBJ(*zoid);
return (error);
}
/*
* Look up a directory entry under a locked vnode.
* dvp being locked gives us a guarantee that there are no concurrent
* modification of the directory and, thus, if a node can be found in
* the directory, then it must not be unlinked.
*
* Input arguments:
* dzp - znode for directory
* name - name of entry to lock
* flag - ZNEW: if the entry already exists, fail with EEXIST.
* ZEXISTS: if the entry does not exist, fail with ENOENT.
* ZXATTR: we want dzp's xattr directory
*
* Output arguments:
* zpp - pointer to the znode for the entry (NULL if there isn't one)
*
* Return value: 0 on success or errno on failure.
*
* NOTE: Always checks for, and rejects, '.' and '..'.
*/
int
zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
{
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
znode_t *zp;
matchtype_t mt = 0;
uint64_t zoid;
int error = 0;
if (zfsvfs->z_replay == B_FALSE)
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
*zpp = NULL;
/*
* Verify that we are not trying to lock '.', '..', or '.zfs'
*/
if (name[0] == '.' &&
(((name[1] == '\0') || (name[1] == '.' && name[2] == '\0')) ||
(zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)))
return (SET_ERROR(EEXIST));
/*
* Case sensitivity and normalization preferences are set when
* the file system is created. These are stored in the
* zfsvfs->z_case and zfsvfs->z_norm fields. These choices
* affect how we perform zap lookups.
*
* When matching we may need to normalize & change case according to
* FS settings.
*
* Note that a normalized match is necessary for a case insensitive
* filesystem when the lookup request is not exact because normalization
* can fold case independent of normalizing code point sequences.
*
* See the table above zfs_dropname().
*/
if (zfsvfs->z_norm != 0) {
mt = MT_NORMALIZE;
/*
* Determine if the match needs to honor the case specified in
* lookup, and if so keep track of that so that during
* normalization we don't fold case.
*/
if (zfsvfs->z_case == ZFS_CASE_MIXED) {
mt |= MT_MATCH_CASE;
}
}
/*
* Only look in or update the DNLC if we are looking for the
* name on a file system that does not require normalization
* or case folding. We can also look there if we happen to be
* on a non-normalizing, mixed sensitivity file system IF we
* are looking for the exact name.
*
* NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
* because in that case MT_EXACT and MT_FIRST should produce exactly
* the same result.
*/
if (dzp->z_unlinked && !(flag & ZXATTR))
return (ENOENT);
if (flag & ZXATTR) {
error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
sizeof (zoid));
if (error == 0)
error = (zoid == 0 ? ENOENT : 0);
} else {
error = zfs_match_find(zfsvfs, dzp, name, mt, &zoid);
}
if (error) {
if (error != ENOENT || (flag & ZEXISTS)) {
return (error);
}
} else {
if (flag & ZNEW) {
return (SET_ERROR(EEXIST));
}
error = zfs_zget(zfsvfs, zoid, &zp);
if (error)
return (error);
ASSERT(!zp->z_unlinked);
*zpp = zp;
}
return (0);
}
static int
zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
{
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
znode_t *zp;
uint64_t parent;
int error;
if (zfsvfs->z_replay == B_FALSE)
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
if (dzp->z_unlinked)
return (ENOENT);
if ((error = sa_lookup(dzp->z_sa_hdl,
SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
return (error);
error = zfs_zget(zfsvfs, parent, &zp);
if (error == 0)
*zpp = zp;
return (error);
}
int
zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
{
zfsvfs_t *zfsvfs __unused = dzp->z_zfsvfs;
znode_t *zp = NULL;
int error = 0;
#ifdef ZFS_DEBUG
if (zfsvfs->z_replay == B_FALSE)
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
#endif
if (dzp->z_unlinked)
return (SET_ERROR(ENOENT));
if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
*zpp = dzp;
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
error = zfs_dd_lookup(dzp, &zp);
if (error == 0)
*zpp = zp;
} else {
error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
if (error == 0) {
dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
*zpp = zp;
}
}
return (error);
}
/*
* unlinked Set (formerly known as the "delete queue") Error Handling
*
* When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
* don't specify the name of the entry that we will be manipulating. We
* also fib and say that we won't be adding any new entries to the
* unlinked set, even though we might (this is to lower the minimum file
* size that can be deleted in a full filesystem). So on the small
* chance that the nlink list is using a fat zap (ie. has more than
* 2000 entries), we *may* not pre-read a block that's needed.
* Therefore it is remotely possible for some of the assertions
* regarding the unlinked set below to fail due to i/o error. On a
* nondebug system, this will result in the space being leaked.
*/
void
zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
ASSERT(zp->z_unlinked);
ASSERT(zp->z_links == 0);
VERIFY3U(0, ==,
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
}
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
void
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
{
zap_cursor_t zc;
zap_attribute_t zap;
dmu_object_info_t doi;
znode_t *zp;
dmu_tx_t *tx;
int error;
/*
* Interate over the contents of the unlinked set.
*/
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
zap_cursor_retrieve(&zc, &zap) == 0;
zap_cursor_advance(&zc)) {
/*
* See what kind of object we have in list
*/
error = dmu_object_info(zfsvfs->z_os,
zap.za_first_integer, &doi);
if (error != 0)
continue;
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
/*
* We need to re-mark these list entries for deletion,
* so we pull them back into core and set zp->z_unlinked.
*/
error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
/*
* We may pick up znodes that are already marked for deletion.
* This could happen during the purge of an extended attribute
* directory. All we need to do is skip over them, since they
* are already in the system marked z_unlinked.
*/
if (error != 0)
continue;
vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
/*
* Due to changes in zfs_rmnode we need to make sure the
* link count is set to zero here.
*/
if (zp->z_links != 0) {
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
vput(ZTOV(zp));
continue;
}
zp->z_links = 0;
VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
&zp->z_links, sizeof (zp->z_links), tx));
dmu_tx_commit(tx);
}
zp->z_unlinked = B_TRUE;
vput(ZTOV(zp));
}
zap_cursor_fini(&zc);
}
/*
* Delete the entire contents of a directory. Return a count
* of the number of entries that could not be deleted. If we encounter
* an error, return a count of at least one so that the directory stays
* in the unlinked set.
*
* NOTE: this function assumes that the directory is inactive,
* so there is no need to lock its entries before deletion.
* Also, it assumes the directory contents is *only* regular
* files.
*/
static int
zfs_purgedir(znode_t *dzp)
{
zap_cursor_t zc;
zap_attribute_t zap;
znode_t *xzp;
dmu_tx_t *tx;
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
int skipped = 0;
int error;
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
error = zfs_zget(zfsvfs,
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
if (error) {
skipped += 1;
continue;
}
vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
ASSERT((ZTOV(xzp)->v_type == VREG) ||
(ZTOV(xzp)->v_type == VLNK));
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
/* Is this really needed ? */
zfs_sa_upgrade_txholds(tx, xzp);
dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
vput(ZTOV(xzp));
skipped += 1;
continue;
}
error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
if (error)
skipped += 1;
dmu_tx_commit(tx);
vput(ZTOV(xzp));
}
zap_cursor_fini(&zc);
if (error != ENOENT)
skipped += 1;
return (skipped);
}
extern taskq_t *zfsvfs_taskq;
void
zfs_rmnode(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
objset_t *os = zfsvfs->z_os;
dmu_tx_t *tx;
uint64_t acl_obj;
uint64_t xattr_obj;
uint64_t count;
int error;
ASSERT(zp->z_links == 0);
if (zfsvfs->z_replay == B_FALSE)
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
/*
* If this is an attribute directory, purge its contents.
*/
if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
(zp->z_pflags & ZFS_XATTR)) {
if (zfs_purgedir(zp) != 0) {
/*
* Not enough space to delete some xattrs.
* Leave it in the unlinked set.
*/
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
return;
}
} else {
/*
* Free up all the data in the file. We don't do this for
* XATTR directories because we need truncate and remove to be
* in the same tx, like in zfs_znode_delete(). Otherwise, if
* we crash here we'll end up with an inconsistent truncated
* zap object in the delete queue. Note a truncated file is
* harmless since it only contains user data.
*/
error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
if (error) {
/*
* Not enough space or we were interrupted by unmount.
* Leave the file in the unlinked set.
*/
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
return;
}
}
/*
* If the file has extended attributes, we're going to unlink
* the xattr dir.
*/
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj, sizeof (xattr_obj));
if (error)
xattr_obj = 0;
acl_obj = zfs_external_acl(zp);
/*
* Set up the final transaction.
*/
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
if (xattr_obj)
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
if (acl_obj)
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
zfs_sa_upgrade_txholds(tx, zp);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
/*
* Not enough space to delete the file. Leave it in the
* unlinked set, leaking it until the fs is remounted (at
* which point we'll call zfs_unlinked_drain() to process it).
*/
dmu_tx_abort(tx);
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
return;
}
/*
* FreeBSD's implemention of zfs_zget requires a vnode to back it.
* This means that we could end up calling into getnewvnode while
* calling zfs_rmnode as a result of a prior call to getnewvnode
* trying to clear vnodes out of the cache. If this repeats we can
* recurse enough that we overflow our stack. To avoid this, we
* avoid calling zfs_zget on the xattr znode and instead simply add
* it to the unlinked set and schedule a call to zfs_unlinked_drain.
*/
if (xattr_obj) {
/* Add extended attribute directory to the unlinked set. */
VERIFY3U(0, ==,
zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx));
}
mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
/* Remove this znode from the unlinked set */
VERIFY3U(0, ==,
zap_remove_int(os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
}
mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
if (xattr_obj) {
/*
* We're using the FreeBSD taskqueue API here instead of
* the Solaris taskq API since the FreeBSD API allows for a
* task to be enqueued multiple times but executed once.
*/
taskqueue_enqueue(zfsvfs_taskq->tq_queue,
&zfsvfs->z_unlinked_drain_task);
}
}
static uint64_t
zfs_dirent(znode_t *zp, uint64_t mode)
{
uint64_t de = zp->z_id;
if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
de |= IFTODT(mode) << 60;
return (de);
}
/*
* Link zp into dzp. Can only fail if zp has been unlinked.
*/
int
zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
int flag)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
vnode_t *vp = ZTOV(zp);
uint64_t value;
int zp_is_dir = (vp->v_type == VDIR);
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
int count = 0;
int error;
if (zfsvfs->z_replay == B_FALSE) {
ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
}
if (zp_is_dir) {
if (dzp->z_links >= ZFS_LINK_MAX)
return (SET_ERROR(EMLINK));
}
if (!(flag & ZRENAMING)) {
if (zp->z_unlinked) { /* no new links to unlinked zp */
ASSERT(!(flag & (ZNEW | ZEXISTS)));
return (SET_ERROR(ENOENT));
}
if (zp->z_links >= ZFS_LINK_MAX - zp_is_dir) {
return (SET_ERROR(EMLINK));
}
zp->z_links++;
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
&zp->z_links, sizeof (zp->z_links));
} else {
ASSERT(zp->z_unlinked == 0);
}
value = zfs_dirent(zp, zp->z_mode);
error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
8, 1, &value, tx);
/*
* zap_add could fail to add the entry if it exceeds the capacity of the
* leaf-block and zap_leaf_split() failed to help.
* The caller of this routine is responsible for failing the transaction
* which will rollback the SA updates done above.
*/
if (error != 0) {
if (!(flag & ZRENAMING) && !(flag & ZNEW))
zp->z_links--;
return (error);
}
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
&dzp->z_id, sizeof (dzp->z_id));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
if (!(flag & ZNEW)) {
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
ctime);
}
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
ASSERT0(error);
dzp->z_size++;
dzp->z_links += zp_is_dir;
count = 0;
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
&dzp->z_size, sizeof (dzp->z_size));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
&dzp->z_links, sizeof (dzp->z_links));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
mtime, sizeof (mtime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
ASSERT0(error);
return (0);
}
/*
* The match type in the code for this function should conform to:
*
* ------------------------------------------------------------------------
* fs type | z_norm | lookup type | match type
* ---------|-------------|-------------|----------------------------------
* CS !norm | 0 | 0 | 0 (exact)
* CS norm | formX | 0 | MT_NORMALIZE
* CI !norm | upper | !ZCIEXACT | MT_NORMALIZE
* CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
* CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE
* CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
* CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
* CM !norm | upper | ZCILOOK | MT_NORMALIZE
* CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
* CM norm | upper|formX | ZCILOOK | MT_NORMALIZE
*
* Abbreviations:
* CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
* upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
* formX = unicode normalization form set on fs creation
*/
static int
zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
int flag)
{
int error;
if (zp->z_zfsvfs->z_norm) {
matchtype_t mt = MT_NORMALIZE;
if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) {
mt |= MT_MATCH_CASE;
}
error = zap_remove_norm(zp->z_zfsvfs->z_os, dzp->z_id,
name, mt, tx);
} else {
error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, name, tx);
}
return (error);
}
/*
* Unlink zp from dzp, and mark zp for deletion if this was the last link.
* Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
* If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
* If it's non-NULL, we use it to indicate whether the znode needs deletion,
* and it's the caller's job to do it.
*/
int
zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
int flag, boolean_t *unlinkedp)
{
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
vnode_t *vp = ZTOV(zp);
int zp_is_dir = (vp->v_type == VDIR);
boolean_t unlinked = B_FALSE;
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
int count = 0;
int error;
if (zfsvfs->z_replay == B_FALSE) {
ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
}
if (!(flag & ZRENAMING)) {
if (zp_is_dir && !zfs_dirempty(zp))
return (SET_ERROR(ENOTEMPTY));
/*
* If we get here, we are going to try to remove the object.
* First try removing the name from the directory; if that
* fails, return the error.
*/
error = zfs_dropname(dzp, name, zp, tx, flag);
if (error != 0) {
return (error);
}
if (zp->z_links <= zp_is_dir) {
zfs_panic_recover("zfs: link count on vnode %p is %u, "
"should be at least %u", zp->z_vnode,
(int)zp->z_links,
zp_is_dir + 1);
zp->z_links = zp_is_dir + 1;
}
if (--zp->z_links == zp_is_dir) {
zp->z_unlinked = B_TRUE;
zp->z_links = 0;
unlinked = B_TRUE;
} else {
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
NULL, &ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &zp->z_pflags, sizeof (zp->z_pflags));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
ctime);
}
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
NULL, &zp->z_links, sizeof (zp->z_links));
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
count = 0;
ASSERT0(error);
} else {
ASSERT(zp->z_unlinked == 0);
error = zfs_dropname(dzp, name, zp, tx, flag);
if (error != 0)
return (error);
}
dzp->z_size--; /* one dirent removed */
dzp->z_links -= zp_is_dir; /* ".." link from zp */
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
NULL, &dzp->z_links, sizeof (dzp->z_links));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
NULL, &dzp->z_size, sizeof (dzp->z_size));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
NULL, ctime, sizeof (ctime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
NULL, mtime, sizeof (mtime));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
ASSERT0(error);
if (unlinkedp != NULL)
*unlinkedp = unlinked;
else if (unlinked)
zfs_unlinked_add(zp, tx);
return (0);
}
/*
* Indicate whether the directory is empty.
*/
boolean_t
zfs_dirempty(znode_t *dzp)
{
return (dzp->z_size == 2);
}
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
znode_t *xzp;
dmu_tx_t *tx;
int error;
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
uint64_t parent __unused;
*xvpp = NULL;
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
&acl_ids)) != 0)
return (error);
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0)) {
zfs_acl_ids_free(&acl_ids);
return (SET_ERROR(EDQUOT));
}
getnewvnode_reserve_();
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
getnewvnode_drop_reserve();
return (error);
}
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);
#ifdef DEBUG
error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (parent));
ASSERT(error == 0 && parent == zp->z_id);
#endif
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
sizeof (xzp->z_id), tx));
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
xzp, "", NULL, acl_ids.z_fuidp, vap);
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
getnewvnode_drop_reserve();
*xvpp = xzp;
return (0);
}
/*
* Return a znode for the extended attribute directory for zp.
* ** If the directory does not already exist, it is created **
*
* IN: zp - znode to obtain attribute directory from
* cr - credentials of caller
* flags - flags from the VOP_LOOKUP call
*
* OUT: xzpp - pointer to extended attribute znode
*
* RETURN: 0 on success
* error number on failure
*/
int
zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
znode_t *xzp;
vattr_t va;
int error;
top:
error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
if (error)
return (error);
if (xzp != NULL) {
*xzpp = xzp;
return (0);
}
if (!(flags & CREATE_XATTR_DIR))
return (SET_ERROR(ENOATTR));
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
return (SET_ERROR(EROFS));
}
/*
* The ability to 'create' files in an attribute
* directory comes from the write_xattr permission on the base file.
*
* The ability to 'search' an attribute directory requires
* read_xattr permission on the base file.
*
* Once in a directory the ability to read/write attributes
* is controlled by the permissions on the attribute file.
*/
va.va_mask = AT_MODE | AT_UID | AT_GID;
va.va_type = VDIR;
va.va_mode = S_IFDIR | S_ISVTX | 0777;
zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
error = zfs_make_xattrdir(zp, &va, xzpp, cr);
if (error == ERESTART) {
/* NB: we already did dmu_tx_wait() if necessary */
goto top;
}
if (error == 0)
VOP_UNLOCK1(ZTOV(*xzpp));
return (error);
}
/*
* Decide whether it is okay to remove within a sticky directory.
*
* In sticky directories, write access is not sufficient;
* you can remove entries from a directory only if:
*
* you own the directory,
* you own the entry,
* the entry is a plain file and you have write access,
* or you are privileged (checked in secpolicy...).
*
* The function returns 0 if remove access is granted.
*/
int
zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
{
uid_t uid;
uid_t downer;
uid_t fowner;
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
if (zdp->z_zfsvfs->z_replay)
return (0);
if ((zdp->z_mode & S_ISVTX) == 0)
return (0);
downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
(ZTOV(zp)->v_type == VREG &&
zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
return (0);
else
return (secpolicy_vnode_remove(ZTOV(zp), cr));
}
+309
View File
@@ -0,0 +1,309 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_recv.h>
#include <sys/dmu_tx.h>
#include <sys/dbuf.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
#include <sys/zio_checksum.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_file.h>
#include <sys/buf.h>
#include <sys/stat.h>
int
zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
{
struct thread *td;
int rc, fd;
td = curthread;
pwd_ensure_dirs();
/* 12.x doesn't take a const char * */
rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path),
UIO_SYSSPACE, flags, mode);
if (rc)
return (SET_ERROR(rc));
fd = td->td_retval[0];
td->td_retval[0] = 0;
if (fget(curthread, fd, &cap_no_rights, fpp))
kern_close(td, fd);
return (0);
}
void
zfs_file_close(zfs_file_t *fp)
{
fo_close(fp, curthread);
}
static int
zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *offp,
ssize_t *resid)
{
ssize_t rc;
struct uio auio;
struct thread *td;
struct iovec aiov;
td = curthread;
aiov.iov_base = (void *)(uintptr_t)buf;
aiov.iov_len = count;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_resid = count;
auio.uio_rw = UIO_WRITE;
auio.uio_td = td;
auio.uio_offset = *offp;
if ((fp->f_flag & FWRITE) == 0)
return (SET_ERROR(EBADF));
if (fp->f_type == DTYPE_VNODE)
bwillwrite();
rc = fo_write(fp, &auio, td->td_ucred, FOF_OFFSET, td);
if (rc)
return (SET_ERROR(rc));
if (resid)
*resid = auio.uio_resid;
else if (auio.uio_resid)
return (SET_ERROR(EIO));
*offp += count - auio.uio_resid;
return (rc);
}
int
zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
{
loff_t off = fp->f_offset;
ssize_t rc;
rc = zfs_file_write_impl(fp, buf, count, &off, resid);
if (rc == 0)
fp->f_offset = off;
return (SET_ERROR(rc));
}
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
ssize_t *resid)
{
return (zfs_file_write_impl(fp, buf, count, &off, resid));
}
static int
zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *offp,
ssize_t *resid)
{
ssize_t rc;
struct uio auio;
struct thread *td;
struct iovec aiov;
td = curthread;
aiov.iov_base = (void *)(uintptr_t)buf;
aiov.iov_len = count;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_resid = count;
auio.uio_rw = UIO_READ;
auio.uio_td = td;
auio.uio_offset = *offp;
if ((fp->f_flag & FREAD) == 0)
return (SET_ERROR(EBADF));
rc = fo_read(fp, &auio, td->td_ucred, FOF_OFFSET, td);
if (rc)
return (SET_ERROR(rc));
*resid = auio.uio_resid;
*offp += count - auio.uio_resid;
return (SET_ERROR(0));
}
int
zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
{
loff_t off = fp->f_offset;
ssize_t rc;
rc = zfs_file_read_impl(fp, buf, count, &off, resid);
if (rc == 0)
fp->f_offset = off;
return (rc);
}
int
zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
ssize_t *resid)
{
return (zfs_file_read_impl(fp, buf, count, &off, resid));
}
int
zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
{
int rc;
struct thread *td;
td = curthread;
if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
return (SET_ERROR(ESPIPE));
rc = fo_seek(fp, *offp, whence, td);
if (rc == 0)
*offp = td->td_uretoff.tdu_off;
return (SET_ERROR(rc));
}
int
zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
{
struct thread *td;
struct stat sb;
int rc;
td = curthread;
rc = fo_stat(fp, &sb, td->td_ucred, td);
if (rc)
return (SET_ERROR(rc));
zfattr->zfa_size = sb.st_size;
zfattr->zfa_mode = sb.st_mode;
return (0);
}
static __inline int
zfs_vop_fsync(vnode_t *vp)
{
struct mount *mp;
int error;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
goto drop;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
VOP_UNLOCK1(vp);
vn_finished_write(mp);
drop:
return (SET_ERROR(error));
}
int
zfs_file_fsync(zfs_file_t *fp, int flags)
{
struct vnode *v;
if (fp->f_type != DTYPE_VNODE)
return (EINVAL);
v = fp->f_data;
return (zfs_vop_fsync(v));
}
int
zfs_file_get(int fd, zfs_file_t **fpp)
{
struct file *fp;
if (fget(curthread, fd, &cap_no_rights, &fp))
return (SET_ERROR(EBADF));
*fpp = fp;
return (0);
}
void
zfs_file_put(int fd)
{
struct file *fp;
/* No CAP_ rights required, as we're only releasing. */
if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
fdrop(fp, curthread);
fdrop(fp, curthread);
}
}
loff_t
zfs_file_off(zfs_file_t *fp)
{
return (fp->f_offset);
}
void *
zfs_file_private(zfs_file_t *fp)
{
file_t *tmpfp;
void *data;
int error;
tmpfp = curthread->td_fpop;
curthread->td_fpop = fp;
error = devfs_get_cdevpriv(&data);
curthread->td_fpop = tmpfp;
if (error != 0)
return (NULL);
return (data);
}
int
zfs_file_unlink(const char *fnamep)
{
enum uio_seg seg = UIO_SYSSPACE;
int rc;
#if __FreeBSD_version >= 1300018
rc = kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0, 0);
#else
#ifdef AT_BENEATH
rc = kern_unlinkat(curthread, AT_FDCWD, fnamep, seg, 0, 0);
#else
rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
seg, 0);
#endif
#endif
return (SET_ERROR(rc));
}
+52
View File
@@ -0,0 +1,52 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dmu.h>
#include <sys/avl.h>
#include <sys/zap.h>
#include <sys/refcount.h>
#include <sys/nvpair.h>
#ifdef _KERNEL
#include <sys/sid.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#endif
#include <sys/zfs_fuid.h>
uint64_t
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
cred_t *cr, zfs_fuid_info_t **fuidp)
{
uid_t id;
VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
if (IS_EPHEMERAL(id))
return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
return ((uint64_t)id);
}
+194
View File
@@ -0,0 +1,194 @@
/*
* Copyright (c) 2020 iXsystems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#include <sys/zap.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_os.h>
#include <sys/vdev_impl.h>
#include <sys/dmu.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_redact.h>
#include <sys/dmu_tx.h>
#include <sys/sunddi.h>
#include <sys/policy.h>
#include <sys/zone.h>
#include <sys/nvpair.h>
#include <sys/pathname.h>
#include <sys/sdt.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_dir.h>
#include <sys/zfs_onexit.h>
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
#include <sys/fm/util.h>
#include <sys/dsl_crypt.h>
#include <sys/dmu_recv.h>
#include <sys/dmu_send.h>
#include <sys/dmu_recv.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zcp.h>
#include <sys/zio_checksum.h>
#include <sys/vdev_removal.h>
#include <sys/vdev_trim.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_initialize.h>
#include <sys/zfs_ioctl_impl.h>
int
zfs_vfs_ref(zfsvfs_t **zfvp)
{
int error = 0;
if (*zfvp == NULL)
return (SET_ERROR(ESRCH));
error = vfs_busy((*zfvp)->z_vfs, 0);
if (error != 0) {
*zfvp = NULL;
error = SET_ERROR(ESRCH);
}
return (error);
}
int
zfs_vfs_held(zfsvfs_t *zfsvfs)
{
return (zfsvfs->z_vfs != NULL);
}
void
zfs_vfs_rele(zfsvfs_t *zfsvfs)
{
vfs_unbusy(zfsvfs->z_vfs);
}
static const zfs_ioc_key_t zfs_keys_nextboot[] = {
{"command", DATA_TYPE_STRING, 0},
{ ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, 0},
{ ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0}
};
static int
zfs_ioc_jail(zfs_cmd_t *zc)
{
return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
(int)zc->zc_zoneid));
}
static int
zfs_ioc_unjail(zfs_cmd_t *zc)
{
return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
(int)zc->zc_zoneid));
}
static int
zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
{
char name[MAXNAMELEN];
spa_t *spa;
vdev_t *vd;
char *command;
uint64_t pool_guid;
uint64_t vdev_guid;
int error;
if (nvlist_lookup_uint64(innvl,
ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
return (EINVAL);
if (nvlist_lookup_uint64(innvl,
ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
return (EINVAL);
if (nvlist_lookup_string(innvl,
"command", &command) != 0)
return (EINVAL);
mutex_enter(&spa_namespace_lock);
spa = spa_by_guid(pool_guid, vdev_guid);
if (spa != NULL)
strcpy(name, spa_name(spa));
mutex_exit(&spa_namespace_lock);
if (spa == NULL)
return (ENOENT);
if ((error = spa_open(name, &spa, FTAG)) != 0)
return (error);
spa_vdev_state_enter(spa, SCL_ALL);
vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
if (vd == NULL) {
(void) spa_vdev_state_exit(spa, NULL, ENXIO);
spa_close(spa, FTAG);
return (ENODEV);
}
error = vdev_label_write_pad2(vd, command, strlen(command));
(void) spa_vdev_state_exit(spa, NULL, 0);
txg_wait_synced(spa->spa_dsl_pool, 0);
spa_close(spa, FTAG);
return (error);
}
void
zfs_ioctl_init_os(void)
{
zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
zfs_secpolicy_config, POOL_CHECK_NONE);
zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
zfs_secpolicy_config, POOL_CHECK_NONE);
zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_nextboot, 3);
}
+70
View File
@@ -0,0 +1,70 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_onexit.h>
static int
zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
{
*zo = zfsdev_get_state(minor, ZST_ONEXIT);
if (*zo == NULL)
return (SET_ERROR(EBADF));
return (0);
}
int
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
file_t *fp, *tmpfp;
zfs_onexit_t *zo;
void *data;
int error;
if ((error = zfs_file_get(fd, &fp)))
return (error);
tmpfp = curthread->td_fpop;
curthread->td_fpop = fp;
error = devfs_get_cdevpriv(&data);
if (error == 0)
*minorp = (minor_t)(uintptr_t)data;
curthread->td_fpop = tmpfp;
if (error != 0)
return (SET_ERROR(EBADF));
return (zfs_onexit_minor_to_state(*minorp, &zo));
}
void
zfs_onexit_fd_rele(int fd)
{
zfs_file_put(fd);
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff