mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Add FreeBSD support to OpenZFS
Add the FreeBSD platform code to the OpenZFS repository. As of this commit the source can be compiled and tested on FreeBSD 11 and 12. Subsequent commits are now required to compile on FreeBSD and Linux. Additionally, they must pass the ZFS Test Suite on FreeBSD which is being run by the CI. As of this commit 1230 tests pass on FreeBSD and there are no unexpected failures. Reviewed-by: Sean Eric Fagan <sef@ixsystems.com> Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed-by: Richard Laager <rlaager@wiktel.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: Matt Macy <mmacy@FreeBSD.org> Signed-off-by: Ryan Moeller <ryan@iXsystems.com> Closes #898 Closes #8987
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
*.ko.unsigned
|
||||
*.ko.out
|
||||
*.ko.out.sig
|
||||
*.ko.debug
|
||||
*.ko.full
|
||||
*.dwo
|
||||
.*.cmd
|
||||
.*.d
|
||||
@@ -11,5 +13,13 @@
|
||||
/.tmp_versions
|
||||
/Module.markers
|
||||
/Module.symvers
|
||||
/vnode_if*
|
||||
/bus_if.h
|
||||
/device_if.h
|
||||
/opt_global.h
|
||||
|
||||
/export_syms
|
||||
/machine
|
||||
/x86
|
||||
|
||||
!Makefile.in
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
.if !defined(WITH_CTF)
|
||||
WITH_CTF=1
|
||||
.endif
|
||||
|
||||
.include <bsd.sys.mk>
|
||||
|
||||
SRCDIR= ${.CURDIR}
|
||||
INCDIR=${.CURDIR:H}/include
|
||||
|
||||
KMOD= openzfs
|
||||
|
||||
.PATH: ${SRCDIR}/avl \
|
||||
${SRCDIR}/lua \
|
||||
${SRCDIR}/nvpair \
|
||||
${SRCDIR}/os/freebsd/spl \
|
||||
${SRCDIR}/os/freebsd/zfs \
|
||||
${SRCDIR}/unicode \
|
||||
${SRCDIR}/zcommon \
|
||||
${SRCDIR}/zfs
|
||||
|
||||
|
||||
CFLAGS+= -I${INCDIR}
|
||||
CFLAGS+= -I${INCDIR}/spl
|
||||
CFLAGS+= -I${INCDIR}/os/freebsd
|
||||
CFLAGS+= -I${INCDIR}/os/freebsd/spl
|
||||
CFLAGS+= -I${INCDIR}/os/freebsd/zfs
|
||||
CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
|
||||
|
||||
CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1
|
||||
CFLAGS+= -DHAVE_UIO_ZEROCOPY -DWITHOUT_NETDUMP -D__KERNEL -D_SYS_CONDVAR_H_
|
||||
CFLAGS+= -D_SYS_VMEM_H_ -D_MACHINE_ENDIAN_H_ -DKDTRACE_HOOKS -DSMP
|
||||
|
||||
.if ${MACHINE_ARCH} == "amd64"
|
||||
CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F
|
||||
.endif
|
||||
|
||||
.if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
|
||||
CFLAGS+= -DINVARIANTS -DWITNESS -g -O0 -DZFS_DEBUG -DOPENSOLARIS_WITNESS
|
||||
.else
|
||||
CFLAGS += -DNDEBUG
|
||||
.endif
|
||||
|
||||
.if defined(WITH_VFS_DEBUG) && ${WITH_VFS_DEBUG} == "true"
|
||||
# kernel must also be built with this option for this to work
|
||||
CFLAGS+= -DDEBUG_VFS_LOCKS
|
||||
.endif
|
||||
|
||||
.if defined(WITH_GCOV) && ${WITH_GCOV} == "true"
|
||||
CFLAGS+= -fprofile-arcs -ftest-coverage
|
||||
.endif
|
||||
|
||||
DEBUG_FLAGS=-g
|
||||
|
||||
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
|
||||
${MACHINE_ARCH} == "arm"
|
||||
CFLAGS+= -DBITS_PER_LONG=32
|
||||
.else
|
||||
CFLAGS+= -DBITS_PER_LONG=64
|
||||
.endif
|
||||
|
||||
SRCS= vnode_if.h device_if.h bus_if.h
|
||||
|
||||
# avl
|
||||
SRCS+= avl.c
|
||||
|
||||
#lua
|
||||
SRCS+= lapi.c \
|
||||
lauxlib.c \
|
||||
lbaselib.c \
|
||||
lcode.c \
|
||||
lcompat.c \
|
||||
lcorolib.c \
|
||||
lctype.c \
|
||||
ldebug.c \
|
||||
ldo.c \
|
||||
lfunc.c \
|
||||
lgc.c \
|
||||
llex.c \
|
||||
lmem.c \
|
||||
lobject.c \
|
||||
lopcodes.c \
|
||||
lparser.c \
|
||||
lstate.c \
|
||||
lstring.c \
|
||||
lstrlib.c \
|
||||
ltable.c \
|
||||
ltablib.c \
|
||||
ltm.c \
|
||||
lvm.c \
|
||||
lzio.c
|
||||
|
||||
#nvpair
|
||||
SRCS+= nvpair.c \
|
||||
fnvpair.c \
|
||||
nvpair_alloc_spl.c \
|
||||
nvpair_alloc_fixed.c
|
||||
|
||||
#os/freebsd/spl
|
||||
SRCS+= acl_common.c \
|
||||
btree.c \
|
||||
callb.c \
|
||||
list.c \
|
||||
spl_acl.c \
|
||||
spl_cmn_err.c \
|
||||
spl_dtrace.c \
|
||||
spl_kmem.c \
|
||||
spl_kstat.c \
|
||||
spl_misc.c \
|
||||
spl_policy.c \
|
||||
spl_string.c \
|
||||
spl_sunddi.c \
|
||||
spl_sysevent.c \
|
||||
spl_taskq.c \
|
||||
spl_uio.c \
|
||||
spl_vfs.c \
|
||||
spl_vm.c \
|
||||
spl_zone.c \
|
||||
sha256c.c \
|
||||
sha512c.c \
|
||||
spl_procfs_list.c \
|
||||
spl_zlib.c
|
||||
|
||||
|
||||
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
|
||||
${MACHINE_ARCH} == "arm"
|
||||
SRCS+= spl_atomic.c
|
||||
.endif
|
||||
|
||||
#os/freebsd/zfs
|
||||
SRCS+= abd.c \
|
||||
crypto_os.c \
|
||||
dmu_os.c \
|
||||
hkdf.c \
|
||||
kmod_core.c \
|
||||
spa_os.c \
|
||||
sysctl_os.c \
|
||||
vdev_file.c \
|
||||
vdev_label_os.c \
|
||||
vdev_geom.c \
|
||||
zfs_acl.c \
|
||||
zfs_ctldir.c \
|
||||
zfs_dir.c \
|
||||
zfs_ioctl_os.c \
|
||||
zfs_log.c \
|
||||
zfs_replay.c \
|
||||
zfs_vfsops.c \
|
||||
zfs_vnops.c \
|
||||
zfs_znode.c \
|
||||
zio_crypt.c \
|
||||
zvol_os.c
|
||||
|
||||
#unicode
|
||||
SRCS+= uconv.c \
|
||||
u8_textprep.c
|
||||
|
||||
#zcommon
|
||||
SRCS+= zfeature_common.c \
|
||||
zfs_comutil.c \
|
||||
zfs_deleg.c \
|
||||
zfs_fletcher.c \
|
||||
zfs_fletcher_avx512.c \
|
||||
zfs_fletcher_intel.c \
|
||||
zfs_fletcher_sse.c \
|
||||
zfs_fletcher_superscalar.c \
|
||||
zfs_fletcher_superscalar4.c \
|
||||
zfs_namecheck.c \
|
||||
zfs_prop.c \
|
||||
zpool_prop.c \
|
||||
zprop_common.c
|
||||
|
||||
#zfs
|
||||
SRCS+= aggsum.c \
|
||||
arc.c \
|
||||
arc_os.c \
|
||||
blkptr.c \
|
||||
bplist.c \
|
||||
bpobj.c \
|
||||
cityhash.c \
|
||||
dbuf.c \
|
||||
dbuf_stats.c \
|
||||
bptree.c \
|
||||
bqueue.c \
|
||||
dataset_kstats.c \
|
||||
ddt.c \
|
||||
ddt_zap.c \
|
||||
dmu.c \
|
||||
dmu_diff.c \
|
||||
dmu_object.c \
|
||||
dmu_objset.c \
|
||||
dmu_recv.c \
|
||||
dmu_redact.c \
|
||||
dmu_send.c \
|
||||
dmu_traverse.c \
|
||||
dmu_tx.c \
|
||||
dmu_zfetch.c \
|
||||
dnode.c \
|
||||
dnode_sync.c \
|
||||
dsl_dataset.c \
|
||||
dsl_deadlist.c \
|
||||
dsl_deleg.c \
|
||||
dsl_bookmark.c \
|
||||
dsl_dir.c \
|
||||
dsl_crypt.c \
|
||||
dsl_destroy.c \
|
||||
dsl_pool.c \
|
||||
dsl_prop.c \
|
||||
dsl_scan.c \
|
||||
dsl_synctask.c \
|
||||
dsl_userhold.c \
|
||||
fm.c \
|
||||
gzip.c \
|
||||
lzjb.c \
|
||||
lz4.c \
|
||||
metaslab.c \
|
||||
mmp.c \
|
||||
multilist.c \
|
||||
objlist.c \
|
||||
pathname.c \
|
||||
range_tree.c \
|
||||
refcount.c \
|
||||
rrwlock.c \
|
||||
sa.c \
|
||||
sha256.c \
|
||||
skein_zfs.c \
|
||||
spa.c \
|
||||
spa_boot.c \
|
||||
spa_checkpoint.c \
|
||||
spa_config.c \
|
||||
spa_errlog.c \
|
||||
spa_history.c \
|
||||
spa_log_spacemap.c \
|
||||
spa_misc.c \
|
||||
spa_stats.c \
|
||||
space_map.c \
|
||||
space_reftree.c \
|
||||
txg.c \
|
||||
uberblock.c \
|
||||
unique.c \
|
||||
vdev.c \
|
||||
vdev_cache.c \
|
||||
vdev_indirect.c \
|
||||
vdev_indirect_births.c \
|
||||
vdev_indirect_mapping.c \
|
||||
vdev_initialize.c \
|
||||
vdev_label.c \
|
||||
vdev_mirror.c \
|
||||
vdev_missing.c \
|
||||
vdev_queue.c \
|
||||
vdev_raidz.c \
|
||||
vdev_raidz_math.c \
|
||||
vdev_raidz_math_scalar.c \
|
||||
vdev_raidz_math_avx2.c \
|
||||
vdev_raidz_math_avx512bw.c \
|
||||
vdev_raidz_math_avx512f.c \
|
||||
vdev_raidz_math_sse2.c \
|
||||
vdev_raidz_math_ssse3.c \
|
||||
vdev_removal.c \
|
||||
vdev_root.c \
|
||||
vdev_trim.c \
|
||||
zap.c \
|
||||
zap_leaf.c \
|
||||
zap_micro.c \
|
||||
zcp.c \
|
||||
zcp_get.c \
|
||||
zcp_global.c \
|
||||
zcp_iter.c \
|
||||
zcp_set.c \
|
||||
zcp_synctask.c \
|
||||
zfeature.c \
|
||||
zfs_byteswap.c \
|
||||
zfs_debug.c \
|
||||
zfs_file_os.c \
|
||||
zfs_fm.c \
|
||||
zfs_fuid.c \
|
||||
zfs_fuid_os.c \
|
||||
zfs_ioctl.c \
|
||||
zfs_onexit.c \
|
||||
zfs_quota.c \
|
||||
zfs_ratelimit.c \
|
||||
zfs_rlock.c \
|
||||
zfs_sa.c \
|
||||
zil.c \
|
||||
zio.c \
|
||||
zio_checksum.c \
|
||||
zio_compress.c \
|
||||
zio_inject.c \
|
||||
zle.c \
|
||||
zrlock.c \
|
||||
zthr.c \
|
||||
zvol.c
|
||||
|
||||
beforeinstall:
|
||||
.if ${MK_DEBUG_FILES} != "no"
|
||||
mtree -eu \
|
||||
-f /etc/mtree/BSD.debug.dist \
|
||||
-p ${DESTDIR}/usr/lib
|
||||
.endif
|
||||
|
||||
.include <bsd.kmod.mk>
|
||||
|
||||
|
||||
CFLAGS.gcc+= -Wno-pointer-to-int-cast
|
||||
|
||||
CFLAGS.lapi.c= -Wno-cast-qual
|
||||
CFLAGS.lcompat.c= -Wno-cast-qual -Wno-missing-prototypes
|
||||
CFLAGS.lobject.c= -Wno-cast-qual
|
||||
CFLAGS.ltable.c= -Wno-cast-qual
|
||||
CFLAGS.lvm.c= -Wno-cast-qual
|
||||
CFLAGS.nvpair.c= -Wno-cast-qual
|
||||
CFLAGS.acl_common.c= -Wno-strict-prototypes -Wno-missing-prototypes
|
||||
CFLAGS.callb.c= -Wno-strict-prototypes -Wno-missing-prototypes
|
||||
CFLAGS.spl_kstat.c= -Wno-missing-prototypes
|
||||
CFLAGS.spl_string.c= -Wno-cast-qual
|
||||
CFLAGS.spl_vm.c= -Wno-cast-qual -Wno-missing-prototypes
|
||||
CFLAGS.spl_zlib.c= -Wno-cast-qual
|
||||
CFLAGS.abd.c= -Wno-cast-qual
|
||||
CFLAGS.freebsd_dmu.c= -Wno-missing-prototypes
|
||||
CFLAGS.freebsd_kmod.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev_geom.c= -Wno-missing-prototypes
|
||||
CFLAGS.zfs_acl.c= -Wno-missing-prototypes
|
||||
CFLAGS.zfs_ctldir.c= -Wno-missing-prototypes -Wno-strict-prototypes
|
||||
CFLAGS.zfs_log.c= -Wno-cast-qual
|
||||
CFLAGS.zfs_vfsops.c= -Wno-missing-prototypes
|
||||
CFLAGS.zfs_vnops.c= -Wno-missing-prototypes -Wno-strict-prototypes -Wno-pointer-arith
|
||||
CFLAGS.zfs_znode.c= -Wno-missing-prototypes
|
||||
CFLAGS.zvol.c= -Wno-missing-prototypes
|
||||
CFLAGS.u8_textprep.c= -Wno-cast-qual
|
||||
CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.zfs_fletcher_intel.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.zfs_fletcher_sse.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.zprop_common.c= -Wno-cast-qual
|
||||
CFLAGS.arc.c= -Wno-missing-prototypes
|
||||
CFLAGS.blkptr.c= -Wno-missing-prototypes
|
||||
CFLAGS.dbuf.c= -Wno-missing-prototypes
|
||||
CFLAGS.dbuf_stats.c= -Wno-missing-prototypes
|
||||
CFLAGS.ddt.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.dmu.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.dmu_object.c= -Wno-missing-prototypes
|
||||
CFLAGS.dmu_objset.c= -Wno-missing-prototypes
|
||||
CFLAGS.dmu_traverse.c= -Wno-cast-qual
|
||||
CFLAGS.dsl_dir.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.dsl_crypt.c= -Wno-missing-prototypes
|
||||
CFLAGS.dsl_deadlist.c= -Wno-cast-qual
|
||||
CFLAGS.dsl_pool.c= -Wno-missing-prototypes
|
||||
CFLAGS.dsl_prop.c= -Wno-cast-qual
|
||||
CFLAGS.dsl_scan.c= -Wno-missing-prototypes
|
||||
CFLAGS.fm.c= -Wno-cast-qual
|
||||
CFLAGS.gzip.c= -Wno-missing-prototypes
|
||||
CFLAGS.lzjb.c= -Wno-missing-prototypes
|
||||
CFLAGS.lz4.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.metaslab.c= -Wno-missing-prototypes
|
||||
CFLAGS.sa.c= -Wno-missing-prototypes
|
||||
CFLAGS.sha256.c= -Wno-missing-prototypes
|
||||
CFLAGS.skein_zfs.c= -Wno-missing-prototypes
|
||||
CFLAGS.spa.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.spa_boot.c= -Wno-missing-prototypes
|
||||
CFLAGS.spa_misc.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.space_map.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev_indirect.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev_label.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev_queue.c= -Wno-missing-prototypes
|
||||
CFLAGS.vdev_raidz.c= -Wno-cast-qual
|
||||
CFLAGS.vdev_raidz_math.c= -Wno-cast-qual
|
||||
CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual -Wno-missing-prototypes
|
||||
CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.zap_leaf.c= -Wno-cast-qual
|
||||
CFLAGS.zap_micro.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.zcp.c= -Wno-cast-qual
|
||||
CFLAGS.zcp_get.c= -Wno-missing-prototypes
|
||||
CFLAGS.zfs_debug.c= -Wno-missing-prototypes
|
||||
CFLAGS.zfs_fm.c= -Wno-cast-qual
|
||||
CFLAGS.zfs_ioctl.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.zil.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.zio.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
CFLAGS.zio_checksum.c= -Wno-missing-prototypes
|
||||
CFLAGS.zle.c= -Wno-missing-prototypes
|
||||
CFLAGS.zrlock.c= -Wno-missing-prototypes -Wno-cast-qual
|
||||
+23
-4
@@ -12,7 +12,7 @@ obj-m += os/linux/zfs/
|
||||
INSTALL_MOD_DIR ?= extra
|
||||
|
||||
ZFS_MODULE_CFLAGS += -std=gnu99 -Wno-declaration-after-statement
|
||||
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
|
||||
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @NO_FORMAT_ZERO_LENGTH@
|
||||
ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
|
||||
ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include/os/linux/kernel
|
||||
ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include/os/linux/spl
|
||||
@@ -40,6 +40,11 @@ modules-Linux:
|
||||
done
|
||||
$(MAKE) -C @LINUX_OBJ@ M=`pwd` @KERNEL_MAKE@ CONFIG_ZFS=m modules
|
||||
|
||||
# Only pass down gmake -j flag, if used.
|
||||
modules-FreeBSD:
|
||||
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
|
||||
env MAKEFLAGS="" make $${flags} -f Makefile.bsd
|
||||
|
||||
modules-unknown:
|
||||
@true
|
||||
|
||||
@@ -55,6 +60,10 @@ clean-Linux:
|
||||
|
||||
find . -name '*.ur-safe' -type f -print | xargs $(RM)
|
||||
|
||||
clean-FreeBSD:
|
||||
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
|
||||
env MAKEFLAGS="" make $${flags} -f Makefile.bsd clean
|
||||
|
||||
clean: clean-@ac_system@
|
||||
|
||||
modules_install-Linux:
|
||||
@@ -100,6 +109,11 @@ cscopelist-am: $(am__tagged_files)
|
||||
fi; \
|
||||
done >> $(top_builddir)/cscope.files
|
||||
|
||||
modules_install-FreeBSD:
|
||||
@# Install the kernel modules
|
||||
flags="$$(echo $$MAKEFLAGS | awk -v RS=' ' /^-j/)"; \
|
||||
env MAKEFLAGS="" make $${flags} -f Makefile.bsd install
|
||||
|
||||
modules_install: modules_install-@ac_system@
|
||||
|
||||
modules_uninstall-Linux:
|
||||
@@ -109,11 +123,16 @@ modules_uninstall-Linux:
|
||||
$(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \
|
||||
done
|
||||
|
||||
modules_uninstall-FreeBSD:
|
||||
@false
|
||||
|
||||
modules_uninstall: modules_uninstall-@ac_system@
|
||||
|
||||
distdir:
|
||||
list='$(obj-m)'; for objdir in $$list; do \
|
||||
(cd @top_srcdir@/module && find $$objdir \
|
||||
-name '*.c' -o -name '*.h' -o -name '*.S' | \
|
||||
xargs -r cp --parents -t @abs_top_builddir@/module/$$distdir); \
|
||||
(cd @top_srcdir@/module && find $$objdir -name '*.[chS]' | \
|
||||
while read path; do \
|
||||
mkdir -p @abs_top_builddir@/module/$$distdir/$${path%/*}; \
|
||||
cp $$path @abs_top_builddir@/module/$$distdir/$$path; \
|
||||
done); \
|
||||
done
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,372 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/condvar.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/systm.h> /* for delay() */
|
||||
#include <sys/taskq.h> /* For TASKQ_NAMELEN */
|
||||
#include <sys/kernel.h>
|
||||
|
||||
#define CB_MAXNAME TASKQ_NAMELEN
|
||||
|
||||
/*
|
||||
* The callb mechanism provides generic event scheduling/echoing.
|
||||
* A callb function is registered and called on behalf of the event.
|
||||
*/
|
||||
typedef struct callb {
|
||||
struct callb *c_next; /* next in class or on freelist */
|
||||
kthread_id_t c_thread; /* ptr to caller's thread struct */
|
||||
char c_flag; /* info about the callb state */
|
||||
uchar_t c_class; /* this callb's class */
|
||||
kcondvar_t c_done_cv; /* signal callb completion */
|
||||
boolean_t (*c_func)(); /* cb function: returns true if ok */
|
||||
void *c_arg; /* arg to c_func */
|
||||
char c_name[CB_MAXNAME+1]; /* debug:max func name length */
|
||||
} callb_t;
|
||||
|
||||
/*
|
||||
* callb c_flag bitmap definitions
|
||||
*/
|
||||
#define CALLB_FREE 0x0
|
||||
#define CALLB_TAKEN 0x1
|
||||
#define CALLB_EXECUTING 0x2
|
||||
|
||||
/*
|
||||
* Basic structure for a callb table.
|
||||
* All callbs are organized into different class groups described
|
||||
* by ct_class array.
|
||||
* The callbs within a class are single-linked and normally run by a
|
||||
* serial execution.
|
||||
*/
|
||||
typedef struct callb_table {
|
||||
kmutex_t ct_lock; /* protect all callb states */
|
||||
callb_t *ct_freelist; /* free callb structures */
|
||||
int ct_busy; /* != 0 prevents additions */
|
||||
kcondvar_t ct_busy_cv; /* to wait for not busy */
|
||||
int ct_ncallb; /* num of callbs allocated */
|
||||
callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */
|
||||
} callb_table_t;
|
||||
|
||||
int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
|
||||
|
||||
static callb_id_t callb_add_common(boolean_t (*)(void *, int),
|
||||
void *, int, char *, kthread_id_t);
|
||||
|
||||
static callb_table_t callb_table; /* system level callback table */
|
||||
static callb_table_t *ct = &callb_table;
|
||||
static kmutex_t callb_safe_mutex;
|
||||
callb_cpr_t callb_cprinfo_safe = {
|
||||
&callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} };
|
||||
|
||||
/*
|
||||
* Init all callb tables in the system.
|
||||
*/
|
||||
void
|
||||
callb_init(void *dummy __unused)
|
||||
{
|
||||
callb_table.ct_busy = 0; /* mark table open for additions */
|
||||
mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
callb_fini(void *dummy __unused)
|
||||
{
|
||||
callb_t *cp;
|
||||
int i;
|
||||
|
||||
mutex_enter(&ct->ct_lock);
|
||||
for (i = 0; i < 16; i++) {
|
||||
while ((cp = ct->ct_freelist) != NULL) {
|
||||
ct->ct_freelist = cp->c_next;
|
||||
ct->ct_ncallb--;
|
||||
kmem_free(cp, sizeof (callb_t));
|
||||
}
|
||||
if (ct->ct_ncallb == 0)
|
||||
break;
|
||||
/* Not all callbacks finished, waiting for the rest. */
|
||||
mutex_exit(&ct->ct_lock);
|
||||
tsleep(ct, 0, "callb", hz / 4);
|
||||
mutex_enter(&ct->ct_lock);
|
||||
}
|
||||
if (ct->ct_ncallb > 0)
|
||||
printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb);
|
||||
mutex_exit(&ct->ct_lock);
|
||||
mutex_destroy(&callb_safe_mutex);
|
||||
mutex_destroy(&callb_table.ct_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* callout_add() is called to register func() be called later.
|
||||
*/
|
||||
static callb_id_t
|
||||
callb_add_common(boolean_t (*func)(void *arg, int code),
|
||||
void *arg, int class, char *name, kthread_id_t t)
|
||||
{
|
||||
callb_t *cp;
|
||||
|
||||
ASSERT(class < NCBCLASS);
|
||||
|
||||
mutex_enter(&ct->ct_lock);
|
||||
while (ct->ct_busy)
|
||||
cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
|
||||
if ((cp = ct->ct_freelist) == NULL) {
|
||||
ct->ct_ncallb++;
|
||||
cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
|
||||
}
|
||||
ct->ct_freelist = cp->c_next;
|
||||
cp->c_thread = t;
|
||||
cp->c_func = func;
|
||||
cp->c_arg = arg;
|
||||
cp->c_class = (uchar_t)class;
|
||||
cp->c_flag |= CALLB_TAKEN;
|
||||
#ifdef DEBUG
|
||||
if (strlen(name) > CB_MAXNAME)
|
||||
cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
|
||||
"too long -- truncated to %d chars",
|
||||
name, CB_MAXNAME);
|
||||
#endif
|
||||
(void) strncpy(cp->c_name, name, CB_MAXNAME);
|
||||
cp->c_name[CB_MAXNAME] = '\0';
|
||||
|
||||
/*
|
||||
* Insert the new callb at the head of its class list.
|
||||
*/
|
||||
cp->c_next = ct->ct_first_cb[class];
|
||||
ct->ct_first_cb[class] = cp;
|
||||
|
||||
mutex_exit(&ct->ct_lock);
|
||||
return ((callb_id_t)cp);
|
||||
}
|
||||
|
||||
/*
|
||||
* The default function to add an entry to the callback table. Since
|
||||
* it uses curthread as the thread identifier to store in the table,
|
||||
* it should be used for the normal case of a thread which is calling
|
||||
* to add ITSELF to the table.
|
||||
*/
|
||||
callb_id_t
|
||||
callb_add(boolean_t (*func)(void *arg, int code),
|
||||
void *arg, int class, char *name)
|
||||
{
|
||||
return (callb_add_common(func, arg, class, name, curthread));
|
||||
}
|
||||
|
||||
/*
|
||||
* A special version of callb_add() above for use by threads which
|
||||
* might be adding an entry to the table on behalf of some other
|
||||
* thread (for example, one which is constructed but not yet running).
|
||||
* In this version the thread id is an argument.
|
||||
*/
|
||||
callb_id_t
|
||||
callb_add_thread(boolean_t (*func)(void *arg, int code),
|
||||
void *arg, int class, char *name, kthread_id_t t)
|
||||
{
|
||||
return (callb_add_common(func, arg, class, name, t));
|
||||
}
|
||||
|
||||
/*
|
||||
* callout_delete() is called to remove an entry identified by id
|
||||
* that was originally placed there by a call to callout_add().
|
||||
* return -1 if fail to delete a callb entry otherwise return 0.
|
||||
*/
|
||||
int
|
||||
callb_delete(callb_id_t id)
|
||||
{
|
||||
callb_t **pp;
|
||||
callb_t *me = (callb_t *)id;
|
||||
|
||||
mutex_enter(&ct->ct_lock);
|
||||
|
||||
for (;;) {
|
||||
pp = &ct->ct_first_cb[me->c_class];
|
||||
while (*pp != NULL && *pp != me)
|
||||
pp = &(*pp)->c_next;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (*pp != me) {
|
||||
cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
|
||||
(void *)me);
|
||||
mutex_exit(&ct->ct_lock);
|
||||
return (-1);
|
||||
}
|
||||
#endif /* DEBUG */
|
||||
|
||||
/*
|
||||
* It is not allowed to delete a callb in the middle of
|
||||
* executing otherwise, the callb_execute() will be confused.
|
||||
*/
|
||||
if (!(me->c_flag & CALLB_EXECUTING))
|
||||
break;
|
||||
|
||||
cv_wait(&me->c_done_cv, &ct->ct_lock);
|
||||
}
|
||||
/* relink the class list */
|
||||
*pp = me->c_next;
|
||||
|
||||
/* clean up myself and return the free callb to the head of freelist */
|
||||
me->c_flag = CALLB_FREE;
|
||||
me->c_next = ct->ct_freelist;
|
||||
ct->ct_freelist = me;
|
||||
|
||||
mutex_exit(&ct->ct_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* class: indicates to execute all callbs in the same class;
|
||||
* code: optional argument for the callb functions.
|
||||
* return: = 0: success
|
||||
* != 0: ptr to string supplied when callback was registered
|
||||
*/
|
||||
void *
|
||||
callb_execute_class(int class, int code)
|
||||
{
|
||||
callb_t *cp;
|
||||
void *ret = NULL;
|
||||
|
||||
ASSERT(class < NCBCLASS);
|
||||
|
||||
mutex_enter(&ct->ct_lock);
|
||||
|
||||
for (cp = ct->ct_first_cb[class];
|
||||
cp != NULL && ret == 0; cp = cp->c_next) {
|
||||
while (cp->c_flag & CALLB_EXECUTING)
|
||||
cv_wait(&cp->c_done_cv, &ct->ct_lock);
|
||||
/*
|
||||
* cont if the callb is deleted while we're sleeping
|
||||
*/
|
||||
if (cp->c_flag == CALLB_FREE)
|
||||
continue;
|
||||
cp->c_flag |= CALLB_EXECUTING;
|
||||
|
||||
#ifdef CALLB_DEBUG
|
||||
printf("callb_execute: name=%s func=%p arg=%p\n",
|
||||
cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
|
||||
#endif /* CALLB_DEBUG */
|
||||
|
||||
mutex_exit(&ct->ct_lock);
|
||||
/* If callback function fails, pass back client's name */
|
||||
if (!(*cp->c_func)(cp->c_arg, code))
|
||||
ret = cp->c_name;
|
||||
mutex_enter(&ct->ct_lock);
|
||||
|
||||
cp->c_flag &= ~CALLB_EXECUTING;
|
||||
cv_broadcast(&cp->c_done_cv);
|
||||
}
|
||||
mutex_exit(&ct->ct_lock);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* callers make sure no recursive entries to this func.
|
||||
* dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
|
||||
*
|
||||
* When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
|
||||
* use a cv_timedwait() in case the kernel thread is blocked.
|
||||
*
|
||||
* Note that this is a generic callback handler for daemon CPR and
|
||||
* should NOT be changed to accommodate any specific requirement in a daemon.
|
||||
* Individual daemons that require changes to the handler shall write
|
||||
* callback routines in their own daemon modules.
|
||||
*/
|
||||
boolean_t
|
||||
callb_generic_cpr(void *arg, int code)
|
||||
{
|
||||
callb_cpr_t *cp = (callb_cpr_t *)arg;
|
||||
clock_t ret = 0; /* assume success */
|
||||
|
||||
mutex_enter(cp->cc_lockp);
|
||||
|
||||
switch (code) {
|
||||
case CB_CODE_CPR_CHKPT:
|
||||
cp->cc_events |= CALLB_CPR_START;
|
||||
#ifdef CPR_NOT_THREAD_SAFE
|
||||
while (!(cp->cc_events & CALLB_CPR_SAFE))
|
||||
/* cv_timedwait() returns -1 if it times out. */
|
||||
if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
|
||||
cp->cc_lockp, (callb_timeout_sec * hz),
|
||||
TR_CLOCK_TICK)) == -1)
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case CB_CODE_CPR_RESUME:
|
||||
cp->cc_events &= ~CALLB_CPR_START;
|
||||
cv_signal(&cp->cc_stop_cv);
|
||||
break;
|
||||
}
|
||||
mutex_exit(cp->cc_lockp);
|
||||
return (ret != -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The generic callback function associated with kernel threads which
|
||||
* are always considered safe.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
boolean_t
|
||||
callb_generic_cpr_safe(void *arg, int code)
|
||||
{
|
||||
return (B_TRUE);
|
||||
}
|
||||
/*
|
||||
* Prevent additions to callback table.
|
||||
*/
|
||||
void
|
||||
callb_lock_table(void)
|
||||
{
|
||||
mutex_enter(&ct->ct_lock);
|
||||
ASSERT(ct->ct_busy == 0);
|
||||
ct->ct_busy = 1;
|
||||
mutex_exit(&ct->ct_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow additions to callback table.
|
||||
*/
|
||||
void
|
||||
callb_unlock_table(void)
|
||||
{
|
||||
mutex_enter(&ct->ct_lock);
|
||||
ASSERT(ct->ct_busy != 0);
|
||||
ct->ct_busy = 0;
|
||||
cv_broadcast(&ct->ct_busy_cv);
|
||||
mutex_exit(&ct->ct_lock);
|
||||
}
|
||||
|
||||
SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
|
||||
SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);
|
||||
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Generic doubly-linked list implementation
|
||||
*/
|
||||
|
||||
#include <sys/list.h>
|
||||
#include <sys/list_impl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/debug.h>
|
||||
|
||||
#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
|
||||
#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
|
||||
#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
|
||||
|
||||
#define list_insert_after_node(list, node, object) { \
|
||||
list_node_t *lnew = list_d2l(list, object); \
|
||||
lnew->list_prev = (node); \
|
||||
lnew->list_next = (node)->list_next; \
|
||||
(node)->list_next->list_prev = lnew; \
|
||||
(node)->list_next = lnew; \
|
||||
}
|
||||
|
||||
#define list_insert_before_node(list, node, object) { \
|
||||
list_node_t *lnew = list_d2l(list, object); \
|
||||
lnew->list_next = (node); \
|
||||
lnew->list_prev = (node)->list_prev; \
|
||||
(node)->list_prev->list_next = lnew; \
|
||||
(node)->list_prev = lnew; \
|
||||
}
|
||||
|
||||
#define list_remove_node(node) \
|
||||
(node)->list_prev->list_next = (node)->list_next; \
|
||||
(node)->list_next->list_prev = (node)->list_prev; \
|
||||
(node)->list_next = (node)->list_prev = NULL
|
||||
|
||||
void
|
||||
list_create(list_t *list, size_t size, size_t offset)
|
||||
{
|
||||
ASSERT(list);
|
||||
ASSERT(size > 0);
|
||||
ASSERT(size >= offset + sizeof (list_node_t));
|
||||
|
||||
list->list_size = size;
|
||||
list->list_offset = offset;
|
||||
list->list_head.list_next = list->list_head.list_prev =
|
||||
&list->list_head;
|
||||
}
|
||||
|
||||
void
|
||||
list_destroy(list_t *list)
|
||||
{
|
||||
list_node_t *node = &list->list_head;
|
||||
|
||||
ASSERT(list);
|
||||
ASSERT(list->list_head.list_next == node);
|
||||
ASSERT(list->list_head.list_prev == node);
|
||||
|
||||
node->list_next = node->list_prev = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
list_insert_after(list_t *list, void *object, void *nobject)
|
||||
{
|
||||
if (object == NULL) {
|
||||
list_insert_head(list, nobject);
|
||||
} else {
|
||||
list_node_t *lold = list_d2l(list, object);
|
||||
list_insert_after_node(list, lold, nobject);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
list_insert_before(list_t *list, void *object, void *nobject)
|
||||
{
|
||||
if (object == NULL) {
|
||||
list_insert_tail(list, nobject);
|
||||
} else {
|
||||
list_node_t *lold = list_d2l(list, object);
|
||||
list_insert_before_node(list, lold, nobject);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
list_insert_head(list_t *list, void *object)
|
||||
{
|
||||
list_node_t *lold = &list->list_head;
|
||||
list_insert_after_node(list, lold, object);
|
||||
}
|
||||
|
||||
void
|
||||
list_insert_tail(list_t *list, void *object)
|
||||
{
|
||||
list_node_t *lold = &list->list_head;
|
||||
list_insert_before_node(list, lold, object);
|
||||
}
|
||||
|
||||
void
|
||||
list_remove(list_t *list, void *object)
|
||||
{
|
||||
list_node_t *lold = list_d2l(list, object);
|
||||
ASSERT(!list_empty(list));
|
||||
ASSERT(lold->list_next != NULL);
|
||||
list_remove_node(lold);
|
||||
}
|
||||
|
||||
void *
|
||||
list_remove_head(list_t *list)
|
||||
{
|
||||
list_node_t *head = list->list_head.list_next;
|
||||
if (head == &list->list_head)
|
||||
return (NULL);
|
||||
list_remove_node(head);
|
||||
return (list_object(list, head));
|
||||
}
|
||||
|
||||
void *
|
||||
list_remove_tail(list_t *list)
|
||||
{
|
||||
list_node_t *tail = list->list_head.list_prev;
|
||||
if (tail == &list->list_head)
|
||||
return (NULL);
|
||||
list_remove_node(tail);
|
||||
return (list_object(list, tail));
|
||||
}
|
||||
|
||||
void *
|
||||
list_head(list_t *list)
|
||||
{
|
||||
if (list_empty(list))
|
||||
return (NULL);
|
||||
return (list_object(list, list->list_head.list_next));
|
||||
}
|
||||
|
||||
void *
|
||||
list_tail(list_t *list)
|
||||
{
|
||||
if (list_empty(list))
|
||||
return (NULL);
|
||||
return (list_object(list, list->list_head.list_prev));
|
||||
}
|
||||
|
||||
void *
|
||||
list_next(list_t *list, void *object)
|
||||
{
|
||||
list_node_t *node = list_d2l(list, object);
|
||||
|
||||
if (node->list_next != &list->list_head)
|
||||
return (list_object(list, node->list_next));
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void *
|
||||
list_prev(list_t *list, void *object)
|
||||
{
|
||||
list_node_t *node = list_d2l(list, object);
|
||||
|
||||
if (node->list_prev != &list->list_head)
|
||||
return (list_object(list, node->list_prev));
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert src list after dst list. Empty src list thereafter.
|
||||
*/
|
||||
void
|
||||
list_move_tail(list_t *dst, list_t *src)
|
||||
{
|
||||
list_node_t *dstnode = &dst->list_head;
|
||||
list_node_t *srcnode = &src->list_head;
|
||||
|
||||
ASSERT(dst->list_size == src->list_size);
|
||||
ASSERT(dst->list_offset == src->list_offset);
|
||||
|
||||
if (list_empty(src))
|
||||
return;
|
||||
|
||||
dstnode->list_prev->list_next = srcnode->list_next;
|
||||
srcnode->list_next->list_prev = dstnode->list_prev;
|
||||
dstnode->list_prev = srcnode->list_prev;
|
||||
srcnode->list_prev->list_next = dstnode;
|
||||
|
||||
/* empty src list */
|
||||
srcnode->list_next = srcnode->list_prev = srcnode;
|
||||
}
|
||||
|
||||
void
|
||||
list_link_replace(list_node_t *lold, list_node_t *lnew)
|
||||
{
|
||||
ASSERT(list_link_active(lold));
|
||||
ASSERT(!list_link_active(lnew));
|
||||
|
||||
lnew->list_next = lold->list_next;
|
||||
lnew->list_prev = lold->list_prev;
|
||||
lold->list_prev->list_next = lnew;
|
||||
lold->list_next->list_prev = lnew;
|
||||
lold->list_next = lold->list_prev = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
list_link_init(list_node_t *link)
|
||||
{
|
||||
link->list_next = NULL;
|
||||
link->list_prev = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
list_link_active(list_node_t *link)
|
||||
{
|
||||
return (link->list_next != NULL);
|
||||
}
|
||||
|
||||
int
|
||||
list_is_empty(list_t *list)
|
||||
{
|
||||
return (list_empty(list));
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SHA224_H_
|
||||
#define _SHA224_H_
|
||||
|
||||
#ifndef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define SHA224_BLOCK_LENGTH 64
|
||||
#define SHA224_DIGEST_LENGTH 28
|
||||
#define SHA224_DIGEST_STRING_LENGTH (SHA224_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
typedef struct SHA224Context {
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
uint8_t buf[SHA224_BLOCK_LENGTH];
|
||||
} SHA224_CTX;
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Ensure libmd symbols do not clash with libcrypto */
|
||||
|
||||
#ifndef SHA224_Init
|
||||
#define SHA224_Init _libmd_SHA224_Init
|
||||
#endif
|
||||
#ifndef SHA224_Update
|
||||
#define SHA224_Update _libmd_SHA224_Update
|
||||
#endif
|
||||
#ifndef SHA224_Final
|
||||
#define SHA224_Final _libmd_SHA224_Final
|
||||
#endif
|
||||
#ifndef SHA224_End
|
||||
#define SHA224_End _libmd_SHA224_End
|
||||
#endif
|
||||
#ifndef SHA224_Fd
|
||||
#define SHA224_Fd _libmd_SHA224_Fd
|
||||
#endif
|
||||
#ifndef SHA224_FdChunk
|
||||
#define SHA224_FdChunk _libmd_SHA224_FdChunk
|
||||
#endif
|
||||
#ifndef SHA224_File
|
||||
#define SHA224_File _libmd_SHA224_File
|
||||
#endif
|
||||
#ifndef SHA224_FileChunk
|
||||
#define SHA224_FileChunk _libmd_SHA224_FileChunk
|
||||
#endif
|
||||
#ifndef SHA224_Data
|
||||
#define SHA224_Data _libmd_SHA224_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA224_version
|
||||
#define SHA224_version _libmd_SHA224_version
|
||||
#endif
|
||||
|
||||
void SHA224_Init(SHA224_CTX *);
|
||||
void SHA224_Update(SHA224_CTX *, const void *, size_t);
|
||||
void SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
|
||||
SHA224_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA224_End(SHA224_CTX *, char *);
|
||||
char *SHA224_Data(const void *, unsigned int, char *);
|
||||
char *SHA224_Fd(int, char *);
|
||||
char *SHA224_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA224_File(const char *, char *);
|
||||
char *SHA224_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_SHA224_H_ */
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SHA256_H_
|
||||
#define _SHA256_H_
|
||||
|
||||
#ifndef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define SHA256_BLOCK_LENGTH 64
|
||||
#define SHA256_DIGEST_LENGTH 32
|
||||
#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
typedef struct SHA256Context {
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
uint8_t buf[SHA256_BLOCK_LENGTH];
|
||||
} SHA256_CTX;
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Ensure libmd symbols do not clash with libcrypto */
|
||||
|
||||
#ifndef SHA256_Init
|
||||
#define SHA256_Init _libmd_SHA256_Init
|
||||
#endif
|
||||
#ifndef SHA256_Update
|
||||
#define SHA256_Update _libmd_SHA256_Update
|
||||
#endif
|
||||
#ifndef SHA256_Final
|
||||
#define SHA256_Final _libmd_SHA256_Final
|
||||
#endif
|
||||
#ifndef SHA256_End
|
||||
#define SHA256_End _libmd_SHA256_End
|
||||
#endif
|
||||
#ifndef SHA256_Fd
|
||||
#define SHA256_Fd _libmd_SHA256_Fd
|
||||
#endif
|
||||
#ifndef SHA256_FdChunk
|
||||
#define SHA256_FdChunk _libmd_SHA256_FdChunk
|
||||
#endif
|
||||
#ifndef SHA256_File
|
||||
#define SHA256_File _libmd_SHA256_File
|
||||
#endif
|
||||
#ifndef SHA256_FileChunk
|
||||
#define SHA256_FileChunk _libmd_SHA256_FileChunk
|
||||
#endif
|
||||
#ifndef SHA256_Data
|
||||
#define SHA256_Data _libmd_SHA256_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA256_Transform
|
||||
#define SHA256_Transform _libmd_SHA256_Transform
|
||||
#endif
|
||||
#ifndef SHA256_version
|
||||
#define SHA256_version _libmd_SHA256_version
|
||||
#endif
|
||||
|
||||
void SHA256_Init(SHA256_CTX *);
|
||||
void SHA256_Update(SHA256_CTX *, const void *, size_t);
|
||||
void SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)],
|
||||
SHA256_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA256_End(SHA256_CTX *, char *);
|
||||
char *SHA256_Data(const void *, unsigned int, char *);
|
||||
char *SHA256_Fd(int, char *);
|
||||
char *SHA256_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA256_File(const char *, char *);
|
||||
char *SHA256_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_SHA256_H_ */
|
||||
@@ -0,0 +1,378 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/systm.h>
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/endian.h>
|
||||
#include "sha224.h"
|
||||
#include "sha256.h"
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
|
||||
/* Copy a vector of big-endian uint32_t into a vector of bytes */
|
||||
#define be32enc_vect(dst, src, len) \
|
||||
memcpy((void *)dst, (const void *)src, (size_t)len)
|
||||
|
||||
/* Copy a vector of bytes into a vector of big-endian uint32_t */
|
||||
#define be32dec_vect(dst, src, len) \
|
||||
memcpy((void *)dst, (const void *)src, (size_t)len)
|
||||
|
||||
#else /* BYTE_ORDER != BIG_ENDIAN */
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
be32enc(dst + i * 4, src[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len vector of (unsigned char) into a length
|
||||
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
|
||||
*/
|
||||
static void
|
||||
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; i++)
|
||||
dst[i] = be32dec(src + i * 4);
|
||||
}
|
||||
|
||||
#endif /* BYTE_ORDER != BIG_ENDIAN */
|
||||
|
||||
/* SHA256 round constants. */
|
||||
static const uint32_t K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c);
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i + ii] + K[i + ii])
|
||||
|
||||
/* Message schedule computation */
|
||||
#define MSCH(W, ii, i) \
|
||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \
|
||||
s0(W[i + ii + 1]) + W[i + ii]
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA256_Transform(uint32_t *state, const unsigned char block[64])
|
||||
{
|
||||
uint32_t W[64];
|
||||
uint32_t S[8];
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
be32dec_vect(W, block, 64);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 48)
|
||||
break;
|
||||
MSCH(W, 0, i);
|
||||
MSCH(W, 1, i);
|
||||
MSCH(W, 2, i);
|
||||
MSCH(W, 3, i);
|
||||
MSCH(W, 4, i);
|
||||
MSCH(W, 5, i);
|
||||
MSCH(W, 6, i);
|
||||
MSCH(W, 7, i);
|
||||
MSCH(W, 8, i);
|
||||
MSCH(W, 9, i);
|
||||
MSCH(W, 10, i);
|
||||
MSCH(W, 11, i);
|
||||
MSCH(W, 12, i);
|
||||
MSCH(W, 13, i);
|
||||
MSCH(W, 14, i);
|
||||
MSCH(W, 15, i);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
||||
|
||||
static unsigned char PAD[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA256_Pad(SHA256_CTX * ctx)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* Figure out how many bytes we have buffered. */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Pad to 56 mod 64, transforming if we finish a block en route. */
|
||||
if (r < 56) {
|
||||
/* Pad to 56 mod 64. */
|
||||
memcpy(&ctx->buf[r], PAD, 56 - r);
|
||||
} else {
|
||||
/* Finish the current block and mix. */
|
||||
memcpy(&ctx->buf[r], PAD, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf);
|
||||
|
||||
/* The start of the final block is all zeroes. */
|
||||
memset(&ctx->buf[0], 0, 56);
|
||||
}
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
be64enc(&ctx->buf[56], ctx->count);
|
||||
|
||||
/* Mix in the final block. */
|
||||
SHA256_Transform(ctx->state, ctx->buf);
|
||||
}
|
||||
|
||||
/* SHA-256 initialization. Begins a SHA-256 operation. */
|
||||
void
|
||||
SHA256_Init(SHA256_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x6A09E667;
|
||||
ctx->state[1] = 0xBB67AE85;
|
||||
ctx->state[2] = 0x3C6EF372;
|
||||
ctx->state[3] = 0xA54FF53A;
|
||||
ctx->state[4] = 0x510E527F;
|
||||
ctx->state[5] = 0x9B05688C;
|
||||
ctx->state[6] = 0x1F83D9AB;
|
||||
ctx->state[7] = 0x5BE0CD19;
|
||||
}
|
||||
|
||||
/* Add bytes into the hash */
|
||||
void
|
||||
SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
uint64_t bitlen;
|
||||
uint32_t r;
|
||||
const unsigned char *src = in;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates */
|
||||
r = (ctx->count >> 3) & 0x3f;
|
||||
|
||||
/* Convert the length into a number of bits */
|
||||
bitlen = len << 3;
|
||||
|
||||
/* Update number of bits */
|
||||
ctx->count += bitlen;
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms */
|
||||
if (len < 64 - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block */
|
||||
memcpy(&ctx->buf[r], src, 64 - r);
|
||||
SHA256_Transform(ctx->state, ctx->buf);
|
||||
src += 64 - r;
|
||||
len -= 64 - r;
|
||||
|
||||
/* Perform complete blocks */
|
||||
while (len >= 64) {
|
||||
SHA256_Transform(ctx->state, src);
|
||||
src += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-256 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA256_Pad(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
/* SHA-224: ******************************************************* */
|
||||
/*
|
||||
* the SHA224 and SHA256 transforms are identical
|
||||
*/
|
||||
|
||||
/* SHA-224 initialization. Begins a SHA-224 operation. */
|
||||
void
|
||||
SHA224_Init(SHA224_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0xC1059ED8;
|
||||
ctx->state[1] = 0x367CD507;
|
||||
ctx->state[2] = 0x3070DD17;
|
||||
ctx->state[3] = 0xF70E5939;
|
||||
ctx->state[4] = 0xFFC00B31;
|
||||
ctx->state[5] = 0x68581511;
|
||||
ctx->state[6] = 0x64f98FA7;
|
||||
ctx->state[7] = 0xBEFA4FA4;
|
||||
}
|
||||
|
||||
/* Add bytes into the SHA-224 hash */
|
||||
void
|
||||
SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
SHA256_Update((SHA256_CTX *)ctx, in, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-224 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA256_Pad((SHA256_CTX *)ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
#ifdef WEAK_REFS
|
||||
/*
|
||||
* When building libmd, provide weak references. Note: this is not
|
||||
* activated in the context of compiling these sources for internal
|
||||
* use in libcrypt.
|
||||
*/
|
||||
#undef SHA256_Init
|
||||
__weak_reference(_libmd_SHA256_Init, SHA256_Init);
|
||||
#undef SHA256_Update
|
||||
__weak_reference(_libmd_SHA256_Update, SHA256_Update);
|
||||
#undef SHA256_Final
|
||||
__weak_reference(_libmd_SHA256_Final, SHA256_Final);
|
||||
#undef SHA256_Transform
|
||||
__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
|
||||
|
||||
#undef SHA224_Init
|
||||
__weak_reference(_libmd_SHA224_Init, SHA224_Init);
|
||||
#undef SHA224_Update
|
||||
__weak_reference(_libmd_SHA224_Update, SHA224_Update);
|
||||
#undef SHA224_Final
|
||||
__weak_reference(_libmd_SHA224_Final, SHA224_Final);
|
||||
#endif
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SHA384_H_
|
||||
#define _SHA384_H_
|
||||
|
||||
#ifndef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define SHA384_BLOCK_LENGTH 128
|
||||
#define SHA384_DIGEST_LENGTH 48
|
||||
#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
typedef struct SHA384Context {
|
||||
uint64_t state[8];
|
||||
uint64_t count[2];
|
||||
uint8_t buf[SHA384_BLOCK_LENGTH];
|
||||
} SHA384_CTX;
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Ensure libmd symbols do not clash with libcrypto */
|
||||
#ifndef SHA384_Init
|
||||
#define SHA384_Init _libmd_SHA384_Init
|
||||
#endif
|
||||
#ifndef SHA384_Update
|
||||
#define SHA384_Update _libmd_SHA384_Update
|
||||
#endif
|
||||
#ifndef SHA384_Final
|
||||
#define SHA384_Final _libmd_SHA384_Final
|
||||
#endif
|
||||
#ifndef SHA384_End
|
||||
#define SHA384_End _libmd_SHA384_End
|
||||
#endif
|
||||
#ifndef SHA384_Fd
|
||||
#define SHA384_Fd _libmd_SHA384_Fd
|
||||
#endif
|
||||
#ifndef SHA384_FdChunk
|
||||
#define SHA384_FdChunk _libmd_SHA384_FdChunk
|
||||
#endif
|
||||
#ifndef SHA384_File
|
||||
#define SHA384_File _libmd_SHA384_File
|
||||
#endif
|
||||
#ifndef SHA384_FileChunk
|
||||
#define SHA384_FileChunk _libmd_SHA384_FileChunk
|
||||
#endif
|
||||
#ifndef SHA384_Data
|
||||
#define SHA384_Data _libmd_SHA384_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA384_version
|
||||
#define SHA384_version _libmd_SHA384_version
|
||||
#endif
|
||||
|
||||
void SHA384_Init(SHA384_CTX *);
|
||||
void SHA384_Update(SHA384_CTX *, const void *, size_t);
|
||||
void SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)],
|
||||
SHA384_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA384_End(SHA384_CTX *, char *);
|
||||
char *SHA384_Data(const void *, unsigned int, char *);
|
||||
char *SHA384_Fd(int, char *);
|
||||
char *SHA384_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA384_File(const char *, char *);
|
||||
char *SHA384_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_SHA384_H_ */
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SHA512_H_
|
||||
#define _SHA512_H_
|
||||
|
||||
#ifndef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define SHA512_BLOCK_LENGTH 128
|
||||
#define SHA512_DIGEST_LENGTH 64
|
||||
#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
typedef struct SHA512Context {
|
||||
uint64_t state[8];
|
||||
uint64_t count[2];
|
||||
uint8_t buf[SHA512_BLOCK_LENGTH];
|
||||
} SHA512_CTX;
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Ensure libmd symbols do not clash with libcrypto */
|
||||
#if 0
|
||||
#ifndef SHA512_Init
|
||||
#define SHA512_Init _libmd_SHA512_Init
|
||||
#endif
|
||||
#ifndef SHA512_Update
|
||||
#define SHA512_Update _libmd_SHA512_Update
|
||||
#endif
|
||||
#ifndef SHA512_Final
|
||||
#define SHA512_Final _libmd_SHA512_Final
|
||||
#endif
|
||||
#endif
|
||||
#ifndef SHA512_End
|
||||
#define SHA512_End _libmd_SHA512_End
|
||||
#endif
|
||||
#ifndef SHA512_Fd
|
||||
#define SHA512_Fd _libmd_SHA512_Fd
|
||||
#endif
|
||||
#ifndef SHA512_FdChunk
|
||||
#define SHA512_FdChunk _libmd_SHA512_FdChunk
|
||||
#endif
|
||||
#ifndef SHA512_File
|
||||
#define SHA512_File _libmd_SHA512_File
|
||||
#endif
|
||||
#ifndef SHA512_FileChunk
|
||||
#define SHA512_FileChunk _libmd_SHA512_FileChunk
|
||||
#endif
|
||||
#ifndef SHA512_Data
|
||||
#define SHA512_Data _libmd_SHA512_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA512_Transform
|
||||
#define SHA512_Transform _libmd_SHA512_Transform
|
||||
#endif
|
||||
#ifndef SHA512_version
|
||||
#define SHA512_version _libmd_SHA512_version
|
||||
#endif
|
||||
|
||||
void SHA512_Init(SHA512_CTX *);
|
||||
void SHA512_Update(SHA512_CTX *, const void *, size_t);
|
||||
void SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)],
|
||||
SHA512_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA512_End(SHA512_CTX *, char *);
|
||||
char *SHA512_Data(const void *, unsigned int, char *);
|
||||
char *SHA512_Fd(int, char *);
|
||||
char *SHA512_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA512_File(const char *, char *);
|
||||
char *SHA512_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_SHA512_H_ */
|
||||
@@ -0,0 +1,508 @@
|
||||
/*
|
||||
* Copyright 2005 Colin Percival
|
||||
* Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/endian.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/systm.h>
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "sha512.h"
|
||||
#include "sha512t.h"
|
||||
#include "sha384.h"
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
|
||||
/* Copy a vector of big-endian uint64_t into a vector of bytes */
|
||||
#define be64enc_vect(dst, src, len) \
|
||||
memcpy((void *)dst, (const void *)src, (size_t)len)
|
||||
|
||||
/* Copy a vector of bytes into a vector of big-endian uint64_t */
|
||||
#define be64dec_vect(dst, src, len) \
|
||||
memcpy((void *)dst, (const void *)src, (size_t)len)
|
||||
|
||||
#else /* BYTE_ORDER != BIG_ENDIAN */
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint64_t) into a length len vector of
|
||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 8.
|
||||
*/
|
||||
static void
|
||||
be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 8; i++)
|
||||
be64enc(dst + i * 8, src[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a big-endian length len vector of (unsigned char) into a length
|
||||
* len/4 vector of (uint64_t). Assumes len is a multiple of 8.
|
||||
*/
|
||||
static void
|
||||
be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 8; i++)
|
||||
dst[i] = be64dec(src + i * 8);
|
||||
}
|
||||
|
||||
#endif /* BYTE_ORDER != BIG_ENDIAN */
|
||||
|
||||
/* SHA512 round constants. */
|
||||
static const uint64_t K[80] = {
|
||||
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
|
||||
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
|
||||
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
|
||||
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
|
||||
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
|
||||
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
|
||||
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
|
||||
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
|
||||
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
|
||||
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
|
||||
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
|
||||
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
|
||||
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
|
||||
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
|
||||
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
|
||||
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
|
||||
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
|
||||
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
|
||||
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
|
||||
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
|
||||
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
|
||||
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
|
||||
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
|
||||
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
|
||||
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
|
||||
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
|
||||
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
|
||||
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
|
||||
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
|
||||
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
|
||||
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
|
||||
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
|
||||
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
|
||||
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
|
||||
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
|
||||
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
|
||||
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
|
||||
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
|
||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
|
||||
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA512 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (64 - n)))
|
||||
#define S0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
|
||||
#define S1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
|
||||
#define s0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
|
||||
#define s1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
|
||||
|
||||
/* SHA512 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c);
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(80 - i) % 8], S[(81 - i) % 8], \
|
||||
S[(82 - i) % 8], S[(83 - i) % 8], \
|
||||
S[(84 - i) % 8], S[(85 - i) % 8], \
|
||||
S[(86 - i) % 8], S[(87 - i) % 8], \
|
||||
W[i + ii] + K[i + ii])
|
||||
|
||||
/* Message schedule computation */
|
||||
#define MSCH(W, ii, i) \
|
||||
W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + \
|
||||
s0(W[i + ii + 1]) + W[i + ii]
|
||||
|
||||
/*
|
||||
* SHA512 block compression function. The 512-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void
|
||||
SHA512_Transform(uint64_t *state,
|
||||
const unsigned char block[SHA512_BLOCK_LENGTH])
|
||||
{
|
||||
uint64_t W[80];
|
||||
uint64_t S[8];
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, SHA512_DIGEST_LENGTH);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 80; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 64)
|
||||
break;
|
||||
MSCH(W, 0, i);
|
||||
MSCH(W, 1, i);
|
||||
MSCH(W, 2, i);
|
||||
MSCH(W, 3, i);
|
||||
MSCH(W, 4, i);
|
||||
MSCH(W, 5, i);
|
||||
MSCH(W, 6, i);
|
||||
MSCH(W, 7, i);
|
||||
MSCH(W, 8, i);
|
||||
MSCH(W, 9, i);
|
||||
MSCH(W, 10, i);
|
||||
MSCH(W, 11, i);
|
||||
MSCH(W, 12, i);
|
||||
MSCH(W, 13, i);
|
||||
MSCH(W, 14, i);
|
||||
MSCH(W, 15, i);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
||||
|
||||
static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* Add padding and terminating bit-count. */
|
||||
static void
|
||||
SHA512_Pad(SHA512_CTX * ctx)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* Figure out how many bytes we have buffered. */
|
||||
r = (ctx->count[1] >> 3) & 0x7f;
|
||||
|
||||
/* Pad to 112 mod 128, transforming if we finish a block en route. */
|
||||
if (r < 112) {
|
||||
/* Pad to 112 mod 128. */
|
||||
memcpy(&ctx->buf[r], PAD, 112 - r);
|
||||
} else {
|
||||
/* Finish the current block and mix. */
|
||||
memcpy(&ctx->buf[r], PAD, 128 - r);
|
||||
SHA512_Transform(ctx->state, ctx->buf);
|
||||
|
||||
/* The start of the final block is all zeroes. */
|
||||
memset(&ctx->buf[0], 0, 112);
|
||||
}
|
||||
|
||||
/* Add the terminating bit-count. */
|
||||
be64enc_vect(&ctx->buf[112], ctx->count, 16);
|
||||
|
||||
/* Mix in the final block. */
|
||||
SHA512_Transform(ctx->state, ctx->buf);
|
||||
}
|
||||
|
||||
/* SHA-512 initialization. Begins a SHA-512 operation. */
|
||||
void
|
||||
SHA512_Init(SHA512_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x6a09e667f3bcc908ULL;
|
||||
ctx->state[1] = 0xbb67ae8584caa73bULL;
|
||||
ctx->state[2] = 0x3c6ef372fe94f82bULL;
|
||||
ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
|
||||
ctx->state[4] = 0x510e527fade682d1ULL;
|
||||
ctx->state[5] = 0x9b05688c2b3e6c1fULL;
|
||||
ctx->state[6] = 0x1f83d9abfb41bd6bULL;
|
||||
ctx->state[7] = 0x5be0cd19137e2179ULL;
|
||||
}
|
||||
|
||||
/* Add bytes into the hash */
|
||||
void
|
||||
SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
uint64_t bitlen[2];
|
||||
uint64_t r;
|
||||
const unsigned char *src = in;
|
||||
|
||||
/* Number of bytes left in the buffer from previous updates */
|
||||
r = (ctx->count[1] >> 3) & 0x7f;
|
||||
|
||||
/* Convert the length into a number of bits */
|
||||
bitlen[1] = ((uint64_t)len) << 3;
|
||||
bitlen[0] = ((uint64_t)len) >> 61;
|
||||
|
||||
/* Update number of bits */
|
||||
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
|
||||
ctx->count[0]++;
|
||||
ctx->count[0] += bitlen[0];
|
||||
|
||||
/* Handle the case where we don't need to perform any transforms */
|
||||
if (len < SHA512_BLOCK_LENGTH - r) {
|
||||
memcpy(&ctx->buf[r], src, len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finish the current block */
|
||||
memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
|
||||
SHA512_Transform(ctx->state, ctx->buf);
|
||||
src += SHA512_BLOCK_LENGTH - r;
|
||||
len -= SHA512_BLOCK_LENGTH - r;
|
||||
|
||||
/* Perform complete blocks */
|
||||
while (len >= SHA512_BLOCK_LENGTH) {
|
||||
SHA512_Transform(ctx->state, src);
|
||||
src += SHA512_BLOCK_LENGTH;
|
||||
len -= SHA512_BLOCK_LENGTH;
|
||||
}
|
||||
|
||||
/* Copy left over data into buffer */
|
||||
memcpy(ctx->buf, src, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-512 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA512_Pad(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
/* SHA-512t: ******************************************************** */
|
||||
/*
|
||||
* the SHA512t transforms are identical to SHA512 so reuse the existing function
|
||||
*/
|
||||
void
|
||||
SHA512_224_Init(SHA512_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x8c3d37c819544da2ULL;
|
||||
ctx->state[1] = 0x73e1996689dcd4d6ULL;
|
||||
ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
|
||||
ctx->state[3] = 0x679dd514582f9fcfULL;
|
||||
ctx->state[4] = 0x0f6d2b697bd44da8ULL;
|
||||
ctx->state[5] = 0x77e36f7304c48942ULL;
|
||||
ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
|
||||
ctx->state[7] = 0x1112e6ad91d692a1ULL;
|
||||
}
|
||||
|
||||
void
|
||||
SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
SHA512_Update(ctx, in, len);
|
||||
}
|
||||
|
||||
void
|
||||
SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH],
|
||||
SHA512_CTX *ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA512_Pad(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
void
|
||||
SHA512_256_Init(SHA512_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0x22312194fc2bf72cULL;
|
||||
ctx->state[1] = 0x9f555fa3c84c64c2ULL;
|
||||
ctx->state[2] = 0x2393b86b6f53b151ULL;
|
||||
ctx->state[3] = 0x963877195940eabdULL;
|
||||
ctx->state[4] = 0x96283ee2a88effe3ULL;
|
||||
ctx->state[5] = 0xbe5e1e2553863992ULL;
|
||||
ctx->state[6] = 0x2b0199fc2c85b8aaULL;
|
||||
ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
|
||||
}
|
||||
|
||||
void
|
||||
SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
SHA512_Update(ctx, in, len);
|
||||
}
|
||||
|
||||
void
|
||||
SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH],
|
||||
SHA512_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA512_Pad(ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
/* ** SHA-384: ******************************************************** */
|
||||
/*
|
||||
* the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
|
||||
*/
|
||||
|
||||
/* SHA-384 initialization. Begins a SHA-384 operation. */
|
||||
void
|
||||
SHA384_Init(SHA384_CTX * ctx)
|
||||
{
|
||||
|
||||
/* Zero bits processed so far */
|
||||
ctx->count[0] = ctx->count[1] = 0;
|
||||
|
||||
/* Magic initialization constants */
|
||||
ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
|
||||
ctx->state[1] = 0x629a292a367cd507ULL;
|
||||
ctx->state[2] = 0x9159015a3070dd17ULL;
|
||||
ctx->state[3] = 0x152fecd8f70e5939ULL;
|
||||
ctx->state[4] = 0x67332667ffc00b31ULL;
|
||||
ctx->state[5] = 0x8eb44a8768581511ULL;
|
||||
ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
|
||||
ctx->state[7] = 0x47b5481dbefa4fa4ULL;
|
||||
}
|
||||
|
||||
/* Add bytes into the SHA-384 hash */
|
||||
void
|
||||
SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
|
||||
{
|
||||
|
||||
SHA512_Update((SHA512_CTX *)ctx, in, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* SHA-384 finalization. Pads the input data, exports the hash value,
|
||||
* and clears the context state.
|
||||
*/
|
||||
void
|
||||
SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
|
||||
{
|
||||
|
||||
/* Add padding */
|
||||
SHA512_Pad((SHA512_CTX *)ctx);
|
||||
|
||||
/* Write the hash */
|
||||
be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
|
||||
|
||||
/* Clear the context state */
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* When building libmd, provide weak references. Note: this is not
|
||||
* activated in the context of compiling these sources for internal
|
||||
* use in libcrypt.
|
||||
*/
|
||||
#undef SHA512_Init
|
||||
__weak_reference(_libmd_SHA512_Init, SHA512_Init);
|
||||
#undef SHA512_Update
|
||||
__weak_reference(_libmd_SHA512_Update, SHA512_Update);
|
||||
#undef SHA512_Final
|
||||
__weak_reference(_libmd_SHA512_Final, SHA512_Final);
|
||||
#undef SHA512_Transform
|
||||
__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
|
||||
|
||||
#undef SHA512_224_Init
|
||||
__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
|
||||
#undef SHA512_224_Update
|
||||
__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
|
||||
#undef SHA512_224_Final
|
||||
__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
|
||||
|
||||
#undef SHA512_256_Init
|
||||
__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
|
||||
#undef SHA512_256_Update
|
||||
__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
|
||||
#undef SHA512_256_Final
|
||||
__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
|
||||
|
||||
#undef SHA384_Init
|
||||
__weak_reference(_libmd_SHA384_Init, SHA384_Init);
|
||||
#undef SHA384_Update
|
||||
__weak_reference(_libmd_SHA384_Update, SHA384_Update);
|
||||
#undef SHA384_Final
|
||||
__weak_reference(_libmd_SHA384_Final, SHA384_Final);
|
||||
#endif
|
||||
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _SHA512T_H_
|
||||
#define _SHA512T_H_
|
||||
|
||||
#include "sha512.h"
|
||||
|
||||
#ifndef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#define SHA512_224_DIGEST_LENGTH 28
|
||||
#define SHA512_224_DIGEST_STRING_LENGTH (SHA512_224_DIGEST_LENGTH * 2 + 1)
|
||||
#define SHA512_256_DIGEST_LENGTH 32
|
||||
#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Ensure libmd symbols do not clash with libcrypto */
|
||||
#ifndef SHA512_224_Init
|
||||
#define SHA512_224_Init _libmd_SHA512_224_Init
|
||||
#endif
|
||||
#ifndef SHA512_224_Update
|
||||
#define SHA512_224_Update _libmd_SHA512_224_Update
|
||||
#endif
|
||||
#ifndef SHA512_224_Final
|
||||
#define SHA512_224_Final _libmd_SHA512_224_Final
|
||||
#endif
|
||||
#ifndef SHA512_224_End
|
||||
#define SHA512_224_End _libmd_SHA512_224_End
|
||||
#endif
|
||||
#ifndef SHA512_224_Fd
|
||||
#define SHA512_224_Fd _libmd_SHA512_224_Fd
|
||||
#endif
|
||||
#ifndef SHA512_224_FdChunk
|
||||
#define SHA512_224_FdChunk _libmd_SHA512_224_FdChunk
|
||||
#endif
|
||||
#ifndef SHA512_224_File
|
||||
#define SHA512_224_File _libmd_SHA512_224_File
|
||||
#endif
|
||||
#ifndef SHA512_224_FileChunk
|
||||
#define SHA512_224_FileChunk _libmd_SHA512_224_FileChunk
|
||||
#endif
|
||||
#ifndef SHA512_224_Data
|
||||
#define SHA512_224_Data _libmd_SHA512_224_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA512_224_Transform
|
||||
#define SHA512_224_Transform _libmd_SHA512_224_Transform
|
||||
#endif
|
||||
#ifndef SHA512_224_version
|
||||
#define SHA512_224_version _libmd_SHA512_224_version
|
||||
#endif
|
||||
|
||||
#ifndef SHA512_256_Init
|
||||
#define SHA512_256_Init _libmd_SHA512_256_Init
|
||||
#endif
|
||||
#ifndef SHA512_256_Update
|
||||
#define SHA512_256_Update _libmd_SHA512_256_Update
|
||||
#endif
|
||||
#ifndef SHA512_256_Final
|
||||
#define SHA512_256_Final _libmd_SHA512_256_Final
|
||||
#endif
|
||||
#ifndef SHA512_256_End
|
||||
#define SHA512_256_End _libmd_SHA512_256_End
|
||||
#endif
|
||||
#ifndef SHA512_256_Fd
|
||||
#define SHA512_256_Fd _libmd_SHA512_256_Fd
|
||||
#endif
|
||||
#ifndef SHA512_256_FdChunk
|
||||
#define SHA512_256_FdChunk _libmd_SHA512_256_FdChunk
|
||||
#endif
|
||||
#ifndef SHA512_256_File
|
||||
#define SHA512_256_File _libmd_SHA512_256_File
|
||||
#endif
|
||||
#ifndef SHA512_256_FileChunk
|
||||
#define SHA512_256_FileChunk _libmd_SHA512_256_FileChunk
|
||||
#endif
|
||||
#ifndef SHA512_256_Data
|
||||
#define SHA512_256_Data _libmd_SHA512_256_Data
|
||||
#endif
|
||||
|
||||
#ifndef SHA512_256_Transform
|
||||
#define SHA512_256_Transform _libmd_SHA512_256_Transform
|
||||
#endif
|
||||
#ifndef SHA512_256_version
|
||||
#define SHA512_256_version _libmd_SHA512_256_version
|
||||
#endif
|
||||
|
||||
void SHA512_224_Init(SHA512_CTX *);
|
||||
void SHA512_224_Update(SHA512_CTX *, const void *, size_t);
|
||||
void SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)],
|
||||
SHA512_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA512_224_End(SHA512_CTX *, char *);
|
||||
char *SHA512_224_Data(const void *, unsigned int, char *);
|
||||
char *SHA512_224_Fd(int, char *);
|
||||
char *SHA512_224_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA512_224_File(const char *, char *);
|
||||
char *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
void SHA512_256_Init(SHA512_CTX *);
|
||||
void SHA512_256_Update(SHA512_CTX *, const void *, size_t);
|
||||
void SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)],
|
||||
SHA512_CTX *);
|
||||
#ifndef _KERNEL
|
||||
char *SHA512_256_End(SHA512_CTX *, char *);
|
||||
char *SHA512_256_Data(const void *, unsigned int, char *);
|
||||
char *SHA512_256_Fd(int, char *);
|
||||
char *SHA512_256_FdChunk(int, char *, off_t, off_t);
|
||||
char *SHA512_256_File(const char *, char *);
|
||||
char *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
|
||||
#endif
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_SHA512T_H_ */
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2008, 2009 Edward Tomasz Napierała <trasz@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/acl.h>
|
||||
|
||||
struct zfs2bsd {
|
||||
uint32_t zb_zfs;
|
||||
int zb_bsd;
|
||||
};
|
||||
|
||||
struct zfs2bsd perms[] = {{ACE_READ_DATA, ACL_READ_DATA},
|
||||
{ACE_WRITE_DATA, ACL_WRITE_DATA},
|
||||
{ACE_EXECUTE, ACL_EXECUTE},
|
||||
{ACE_APPEND_DATA, ACL_APPEND_DATA},
|
||||
{ACE_DELETE_CHILD, ACL_DELETE_CHILD},
|
||||
{ACE_DELETE, ACL_DELETE},
|
||||
{ACE_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES},
|
||||
{ACE_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES},
|
||||
{ACE_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS},
|
||||
{ACE_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS},
|
||||
{ACE_READ_ACL, ACL_READ_ACL},
|
||||
{ACE_WRITE_ACL, ACL_WRITE_ACL},
|
||||
{ACE_WRITE_OWNER, ACL_WRITE_OWNER},
|
||||
{ACE_SYNCHRONIZE, ACL_SYNCHRONIZE},
|
||||
{0, 0}};
|
||||
|
||||
struct zfs2bsd flags[] = {{ACE_FILE_INHERIT_ACE,
|
||||
ACL_ENTRY_FILE_INHERIT},
|
||||
{ACE_DIRECTORY_INHERIT_ACE,
|
||||
ACL_ENTRY_DIRECTORY_INHERIT},
|
||||
{ACE_NO_PROPAGATE_INHERIT_ACE,
|
||||
ACL_ENTRY_NO_PROPAGATE_INHERIT},
|
||||
{ACE_INHERIT_ONLY_ACE,
|
||||
ACL_ENTRY_INHERIT_ONLY},
|
||||
{ACE_INHERITED_ACE,
|
||||
ACL_ENTRY_INHERITED},
|
||||
{ACE_SUCCESSFUL_ACCESS_ACE_FLAG,
|
||||
ACL_ENTRY_SUCCESSFUL_ACCESS},
|
||||
{ACE_FAILED_ACCESS_ACE_FLAG,
|
||||
ACL_ENTRY_FAILED_ACCESS},
|
||||
{0, 0}};
|
||||
|
||||
static int
|
||||
_bsd_from_zfs(uint32_t zfs, const struct zfs2bsd *table)
|
||||
{
|
||||
const struct zfs2bsd *tmp;
|
||||
int bsd = 0;
|
||||
|
||||
for (tmp = table; tmp->zb_zfs != 0; tmp++) {
|
||||
if (zfs & tmp->zb_zfs)
|
||||
bsd |= tmp->zb_bsd;
|
||||
}
|
||||
|
||||
return (bsd);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
_zfs_from_bsd(int bsd, const struct zfs2bsd *table)
|
||||
{
|
||||
const struct zfs2bsd *tmp;
|
||||
uint32_t zfs = 0;
|
||||
|
||||
for (tmp = table; tmp->zb_bsd != 0; tmp++) {
|
||||
if (bsd & tmp->zb_bsd)
|
||||
zfs |= tmp->zb_zfs;
|
||||
}
|
||||
|
||||
return (zfs);
|
||||
}
|
||||
|
||||
int
|
||||
acl_from_aces(struct acl *aclp, const ace_t *aces, int nentries)
|
||||
{
|
||||
int i;
|
||||
struct acl_entry *entry;
|
||||
const ace_t *ace;
|
||||
|
||||
if (nentries < 1) {
|
||||
printf("acl_from_aces: empty ZFS ACL; returning EINVAL.\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
if (nentries > ACL_MAX_ENTRIES) {
|
||||
/*
|
||||
* I believe it may happen only when moving a pool
|
||||
* from SunOS to FreeBSD.
|
||||
*/
|
||||
printf("acl_from_aces: ZFS ACL too big to fit "
|
||||
"into 'struct acl'; returning EINVAL.\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
bzero(aclp, sizeof (*aclp));
|
||||
aclp->acl_maxcnt = ACL_MAX_ENTRIES;
|
||||
aclp->acl_cnt = nentries;
|
||||
|
||||
for (i = 0; i < nentries; i++) {
|
||||
entry = &(aclp->acl_entry[i]);
|
||||
ace = &(aces[i]);
|
||||
|
||||
if (ace->a_flags & ACE_OWNER)
|
||||
entry->ae_tag = ACL_USER_OBJ;
|
||||
else if (ace->a_flags & ACE_GROUP)
|
||||
entry->ae_tag = ACL_GROUP_OBJ;
|
||||
else if (ace->a_flags & ACE_EVERYONE)
|
||||
entry->ae_tag = ACL_EVERYONE;
|
||||
else if (ace->a_flags & ACE_IDENTIFIER_GROUP)
|
||||
entry->ae_tag = ACL_GROUP;
|
||||
else
|
||||
entry->ae_tag = ACL_USER;
|
||||
|
||||
if (entry->ae_tag == ACL_USER || entry->ae_tag == ACL_GROUP)
|
||||
entry->ae_id = ace->a_who;
|
||||
else
|
||||
entry->ae_id = ACL_UNDEFINED_ID;
|
||||
|
||||
entry->ae_perm = _bsd_from_zfs(ace->a_access_mask, perms);
|
||||
entry->ae_flags = _bsd_from_zfs(ace->a_flags, flags);
|
||||
|
||||
switch (ace->a_type) {
|
||||
case ACE_ACCESS_ALLOWED_ACE_TYPE:
|
||||
entry->ae_entry_type = ACL_ENTRY_TYPE_ALLOW;
|
||||
break;
|
||||
case ACE_ACCESS_DENIED_ACE_TYPE:
|
||||
entry->ae_entry_type = ACL_ENTRY_TYPE_DENY;
|
||||
break;
|
||||
case ACE_SYSTEM_AUDIT_ACE_TYPE:
|
||||
entry->ae_entry_type = ACL_ENTRY_TYPE_AUDIT;
|
||||
break;
|
||||
case ACE_SYSTEM_ALARM_ACE_TYPE:
|
||||
entry->ae_entry_type = ACL_ENTRY_TYPE_ALARM;
|
||||
break;
|
||||
default:
|
||||
panic("acl_from_aces: a_type is 0x%x", ace->a_type);
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
aces_from_acl(ace_t *aces, int *nentries, const struct acl *aclp)
|
||||
{
|
||||
int i;
|
||||
const struct acl_entry *entry;
|
||||
ace_t *ace;
|
||||
|
||||
bzero(aces, sizeof (*aces) * aclp->acl_cnt);
|
||||
|
||||
*nentries = aclp->acl_cnt;
|
||||
|
||||
for (i = 0; i < aclp->acl_cnt; i++) {
|
||||
entry = &(aclp->acl_entry[i]);
|
||||
ace = &(aces[i]);
|
||||
|
||||
ace->a_who = entry->ae_id;
|
||||
|
||||
if (entry->ae_tag == ACL_USER_OBJ)
|
||||
ace->a_flags = ACE_OWNER;
|
||||
else if (entry->ae_tag == ACL_GROUP_OBJ)
|
||||
ace->a_flags = (ACE_GROUP | ACE_IDENTIFIER_GROUP);
|
||||
else if (entry->ae_tag == ACL_GROUP)
|
||||
ace->a_flags = ACE_IDENTIFIER_GROUP;
|
||||
else if (entry->ae_tag == ACL_EVERYONE)
|
||||
ace->a_flags = ACE_EVERYONE;
|
||||
else /* ACL_USER */
|
||||
ace->a_flags = 0;
|
||||
|
||||
ace->a_access_mask = _zfs_from_bsd(entry->ae_perm, perms);
|
||||
ace->a_flags |= _zfs_from_bsd(entry->ae_flags, flags);
|
||||
|
||||
switch (entry->ae_entry_type) {
|
||||
case ACL_ENTRY_TYPE_ALLOW:
|
||||
ace->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
|
||||
break;
|
||||
case ACL_ENTRY_TYPE_DENY:
|
||||
ace->a_type = ACE_ACCESS_DENIED_ACE_TYPE;
|
||||
break;
|
||||
case ACL_ENTRY_TYPE_ALARM:
|
||||
ace->a_type = ACE_SYSTEM_ALARM_ACE_TYPE;
|
||||
break;
|
||||
case ACL_ENTRY_TYPE_AUDIT:
|
||||
ace->a_type = ACE_SYSTEM_AUDIT_ACE_TYPE;
|
||||
break;
|
||||
default:
|
||||
panic("aces_from_acl: ae_entry_type is 0x%x",
|
||||
entry->ae_entry_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/atomic.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kernel.h>
|
||||
|
||||
struct mtx atomic_mtx;
|
||||
MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF);
|
||||
#else
|
||||
#include <pthread.h>
|
||||
|
||||
#define mtx_lock(lock) pthread_mutex_lock(lock)
|
||||
#define mtx_unlock(lock) pthread_mutex_unlock(lock)
|
||||
|
||||
static pthread_mutex_t atomic_mtx;
|
||||
|
||||
static __attribute__((constructor)) void
|
||||
atomic_init(void)
|
||||
{
|
||||
pthread_mutex_init(&atomic_mtx, NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(__LP64__) && !defined(__mips_n32) && \
|
||||
!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64)
|
||||
void
|
||||
atomic_add_64(volatile uint64_t *target, int64_t delta)
|
||||
{
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
*target += delta;
|
||||
mtx_unlock(&atomic_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
atomic_dec_64(volatile uint64_t *target)
|
||||
{
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
*target -= 1;
|
||||
mtx_unlock(&atomic_mtx);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint64_t
|
||||
atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
|
||||
{
|
||||
uint64_t newval;
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
newval = (*target += delta);
|
||||
mtx_unlock(&atomic_mtx);
|
||||
return (newval);
|
||||
}
|
||||
|
||||
#if defined(__powerpc__) || defined(__arm__) || defined(__mips__)
|
||||
void
|
||||
atomic_or_8(volatile uint8_t *target, uint8_t value)
|
||||
{
|
||||
mtx_lock(&atomic_mtx);
|
||||
*target |= value;
|
||||
mtx_unlock(&atomic_mtx);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t
|
||||
atomic_or_8_nv(volatile uint8_t *target, uint8_t value)
|
||||
{
|
||||
uint8_t newval;
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
newval = (*target |= value);
|
||||
mtx_unlock(&atomic_mtx);
|
||||
return (newval);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval)
|
||||
{
|
||||
uint64_t oldval;
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
oldval = *target;
|
||||
if (oldval == cmp)
|
||||
*target = newval;
|
||||
mtx_unlock(&atomic_mtx);
|
||||
return (oldval);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval)
|
||||
{
|
||||
uint32_t oldval;
|
||||
|
||||
mtx_lock(&atomic_mtx);
|
||||
oldval = *target;
|
||||
if (oldval == cmp)
|
||||
*target = newval;
|
||||
mtx_unlock(&atomic_mtx);
|
||||
return (oldval);
|
||||
}
|
||||
|
||||
void
|
||||
membar_producer(void)
|
||||
{
|
||||
/* nothing */
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 John Birrell <jb@FreeBSD.org>. All rights reserved.
|
||||
* Copyright 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/cmn_err.h>
|
||||
|
||||
void
|
||||
vcmn_err(int ce, const char *fmt, va_list adx)
|
||||
{
|
||||
char buf[256];
|
||||
const char *prefix;
|
||||
|
||||
prefix = NULL; /* silence unwitty compilers */
|
||||
switch (ce) {
|
||||
case CE_CONT:
|
||||
prefix = "Solaris(cont): ";
|
||||
break;
|
||||
case CE_NOTE:
|
||||
prefix = "Solaris: NOTICE: ";
|
||||
break;
|
||||
case CE_WARN:
|
||||
prefix = "Solaris: WARNING: ";
|
||||
break;
|
||||
case CE_PANIC:
|
||||
prefix = "Solaris(panic): ";
|
||||
break;
|
||||
case CE_IGNORE:
|
||||
break;
|
||||
default:
|
||||
panic("Solaris: unknown severity level");
|
||||
}
|
||||
if (ce == CE_PANIC) {
|
||||
vsnprintf(buf, sizeof (buf), fmt, adx);
|
||||
panic("%s%s", prefix, buf);
|
||||
}
|
||||
if (ce != CE_IGNORE) {
|
||||
printf("%s", prefix);
|
||||
vprintf(fmt, adx);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
cmn_err(int type, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vcmn_err(type, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright 2014 The FreeBSD Project.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed by Steven Hartland.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/sdt.h>
|
||||
|
||||
/* CSTYLED */
|
||||
SDT_PROBE_DEFINE1(sdt, , , set__error, "int");
|
||||
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/vmmeter.h>
|
||||
|
||||
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_map.h>
|
||||
|
||||
#ifdef KMEM_DEBUG
|
||||
#include <sys/queue.h>
|
||||
#include <sys/stack.h>
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
MALLOC_DEFINE(M_SOLARIS, "solaris", "Solaris");
|
||||
#else
|
||||
#define malloc(size, type, flags) malloc(size)
|
||||
#define free(addr, type) free(addr)
|
||||
#endif
|
||||
|
||||
#ifdef KMEM_DEBUG
|
||||
struct kmem_item {
|
||||
struct stack stack;
|
||||
LIST_ENTRY(kmem_item) next;
|
||||
};
|
||||
static LIST_HEAD(, kmem_item) kmem_items;
|
||||
static struct mtx kmem_items_mtx;
|
||||
MTX_SYSINIT(kmem_items_mtx, &kmem_items_mtx, "kmem_items", MTX_DEF);
|
||||
#endif /* KMEM_DEBUG */
|
||||
|
||||
#include <sys/vmem.h>
|
||||
|
||||
void *
|
||||
zfs_kmem_alloc(size_t size, int kmflags)
|
||||
{
|
||||
void *p;
|
||||
#ifdef KMEM_DEBUG
|
||||
struct kmem_item *i;
|
||||
|
||||
size += sizeof (struct kmem_item);
|
||||
#endif
|
||||
p = malloc(MAX(size, 16), M_SOLARIS, kmflags);
|
||||
#ifndef _KERNEL
|
||||
if (kmflags & KM_SLEEP)
|
||||
assert(p != NULL);
|
||||
#endif
|
||||
#ifdef KMEM_DEBUG
|
||||
if (p != NULL) {
|
||||
i = p;
|
||||
p = (uint8_t *)p + sizeof (struct kmem_item);
|
||||
stack_save(&i->stack);
|
||||
mtx_lock(&kmem_items_mtx);
|
||||
LIST_INSERT_HEAD(&kmem_items, i, next);
|
||||
mtx_unlock(&kmem_items_mtx);
|
||||
}
|
||||
#endif
|
||||
return (p);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_kmem_free(void *buf, size_t size __unused)
|
||||
{
|
||||
#ifdef KMEM_DEBUG
|
||||
if (buf == NULL) {
|
||||
printf("%s: attempt to free NULL\n", __func__);
|
||||
return;
|
||||
}
|
||||
struct kmem_item *i;
|
||||
|
||||
buf = (uint8_t *)buf - sizeof (struct kmem_item);
|
||||
mtx_lock(&kmem_items_mtx);
|
||||
LIST_FOREACH(i, &kmem_items, next) {
|
||||
if (i == buf)
|
||||
break;
|
||||
}
|
||||
ASSERT(i != NULL);
|
||||
LIST_REMOVE(i, next);
|
||||
mtx_unlock(&kmem_items_mtx);
|
||||
memset(buf, 0xDC, MAX(size, 16));
|
||||
#endif
|
||||
free(buf, M_SOLARIS);
|
||||
}
|
||||
|
||||
static uint64_t kmem_size_val;
|
||||
|
||||
static void
|
||||
kmem_size_init(void *unused __unused)
|
||||
{
|
||||
|
||||
kmem_size_val = (uint64_t)vm_cnt.v_page_count * PAGE_SIZE;
|
||||
if (kmem_size_val > vm_kmem_size)
|
||||
kmem_size_val = vm_kmem_size;
|
||||
}
|
||||
SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL);
|
||||
|
||||
uint64_t
|
||||
kmem_size(void)
|
||||
{
|
||||
|
||||
return (kmem_size_val);
|
||||
}
|
||||
|
||||
static int
|
||||
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
|
||||
{
|
||||
struct kmem_cache *cache = private;
|
||||
|
||||
return (cache->kc_constructor(mem, cache->kc_private, flags));
|
||||
}
|
||||
|
||||
static void
|
||||
kmem_std_destructor(void *mem, int size __unused, void *private)
|
||||
{
|
||||
struct kmem_cache *cache = private;
|
||||
|
||||
cache->kc_destructor(mem, cache->kc_private);
|
||||
}
|
||||
|
||||
kmem_cache_t *
|
||||
kmem_cache_create(char *name, size_t bufsize, size_t align,
|
||||
int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
|
||||
void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags)
|
||||
{
|
||||
kmem_cache_t *cache;
|
||||
|
||||
ASSERT(vmp == NULL);
|
||||
|
||||
cache = kmem_alloc(sizeof (*cache), KM_SLEEP);
|
||||
strlcpy(cache->kc_name, name, sizeof (cache->kc_name));
|
||||
cache->kc_constructor = constructor;
|
||||
cache->kc_destructor = destructor;
|
||||
cache->kc_private = private;
|
||||
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
|
||||
cache->kc_zone = uma_zcreate(cache->kc_name, bufsize,
|
||||
constructor != NULL ? kmem_std_constructor : NULL,
|
||||
destructor != NULL ? kmem_std_destructor : NULL,
|
||||
NULL, NULL, align > 0 ? align - 1 : 0, cflags);
|
||||
#else
|
||||
cache->kc_size = bufsize;
|
||||
#endif
|
||||
|
||||
return (cache);
|
||||
}
|
||||
|
||||
void
|
||||
kmem_cache_destroy(kmem_cache_t *cache)
|
||||
{
|
||||
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
|
||||
uma_zdestroy(cache->kc_zone);
|
||||
#endif
|
||||
kmem_free(cache, sizeof (*cache));
|
||||
}
|
||||
|
||||
void *
|
||||
kmem_cache_alloc(kmem_cache_t *cache, int flags)
|
||||
{
|
||||
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
|
||||
return (uma_zalloc_arg(cache->kc_zone, cache, flags));
|
||||
#else
|
||||
void *p;
|
||||
|
||||
p = kmem_alloc(cache->kc_size, flags);
|
||||
if (p != NULL && cache->kc_constructor != NULL)
|
||||
kmem_std_constructor(p, cache->kc_size, cache, flags);
|
||||
return (p);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
kmem_cache_free(kmem_cache_t *cache, void *buf)
|
||||
{
|
||||
#if defined(_KERNEL) && !defined(KMEM_DEBUG)
|
||||
uma_zfree_arg(cache->kc_zone, buf, cache);
|
||||
#else
|
||||
if (cache->kc_destructor != NULL)
|
||||
kmem_std_destructor(buf, cache->kc_size, cache);
|
||||
kmem_free(buf, cache->kc_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow our caller to determine if there are running reaps.
|
||||
*
|
||||
* This call is very conservative and may return B_TRUE even when
|
||||
* reaping activity isn't active. If it returns B_FALSE, then reaping
|
||||
* activity is definitely inactive.
|
||||
*/
|
||||
boolean_t
|
||||
kmem_cache_reap_active(void)
|
||||
{
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reap (almost) everything soon.
|
||||
*
|
||||
* Note: this does not wait for the reap-tasks to complete. Caller
|
||||
* should use kmem_cache_reap_active() (above) and/or moderation to
|
||||
* avoid scheduling too many reap-tasks.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
void
|
||||
kmem_cache_reap_soon(kmem_cache_t *cache)
|
||||
{
|
||||
#ifndef KMEM_DEBUG
|
||||
#if __FreeBSD_version >= 1300043
|
||||
uma_zone_reclaim(cache->kc_zone, UMA_RECLAIM_DRAIN);
|
||||
#else
|
||||
zone_drain(cache->kc_zone);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
kmem_reap(void)
|
||||
{
|
||||
#if __FreeBSD_version >= 1300043
|
||||
uma_reclaim(UMA_RECLAIM_TRIM);
|
||||
#else
|
||||
uma_reclaim();
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
void
|
||||
kmem_cache_reap_soon(kmem_cache_t *cache __unused)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
kmem_reap(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
kmem_debugging(void)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
void *
|
||||
calloc(size_t n, size_t s)
|
||||
{
|
||||
return (kmem_zalloc(n * s, KM_NOSLEEP));
|
||||
}
|
||||
|
||||
char *
|
||||
kmem_vasprintf(const char *fmt, va_list adx)
|
||||
{
|
||||
char *msg;
|
||||
va_list adx2;
|
||||
|
||||
va_copy(adx2, adx);
|
||||
msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
|
||||
(void) vsprintf(msg, fmt, adx2);
|
||||
va_end(adx2);
|
||||
|
||||
return (msg);
|
||||
}
|
||||
|
||||
#include <vm/uma.h>
|
||||
#include <vm/uma_int.h>
|
||||
#ifdef KMEM_DEBUG
|
||||
#error "KMEM_DEBUG not currently supported"
|
||||
#endif
|
||||
|
||||
uint64_t
|
||||
spl_kmem_cache_inuse(kmem_cache_t *cache)
|
||||
{
|
||||
return (uma_zone_get_cur(cache->kc_zone));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
spl_kmem_cache_entry_size(kmem_cache_t *cache)
|
||||
{
|
||||
return (cache->kc_zone->uz_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a move callback for cache defragmentation.
|
||||
* XXX: Unimplemented but harmless to stub out for now.
|
||||
*/
|
||||
void
|
||||
spl_kmem_cache_set_move(kmem_cache_t *skc,
|
||||
kmem_cbrc_t (move)(void *, void *, size_t, void *))
|
||||
{
|
||||
ASSERT(move != NULL);
|
||||
}
|
||||
|
||||
#ifdef KMEM_DEBUG
|
||||
void kmem_show(void *);
|
||||
void
|
||||
kmem_show(void *dummy __unused)
|
||||
{
|
||||
struct kmem_item *i;
|
||||
|
||||
mtx_lock(&kmem_items_mtx);
|
||||
if (LIST_EMPTY(&kmem_items))
|
||||
printf("KMEM_DEBUG: No leaked elements.\n");
|
||||
else {
|
||||
printf("KMEM_DEBUG: Leaked elements:\n\n");
|
||||
LIST_FOREACH(i, &kmem_items, next) {
|
||||
printf("address=%p\n", i);
|
||||
stack_print_ddb(&i->stack);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
mtx_unlock(&kmem_items_mtx);
|
||||
}
|
||||
|
||||
SYSUNINIT(sol_kmem, SI_SUB_CPU, SI_ORDER_FIRST, kmem_show, NULL);
|
||||
#endif /* KMEM_DEBUG */
|
||||
@@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/kstat.h>
|
||||
|
||||
static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
|
||||
|
||||
SYSCTL_ROOT_NODE(OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
|
||||
|
||||
void
|
||||
__kstat_set_raw_ops(kstat_t *ksp,
|
||||
int (*headers)(char *buf, size_t size),
|
||||
int (*data)(char *buf, size_t size, void *data),
|
||||
void *(*addr)(kstat_t *ksp, loff_t index))
|
||||
{
|
||||
ksp->ks_raw_ops.headers = headers;
|
||||
ksp->ks_raw_ops.data = data;
|
||||
ksp->ks_raw_ops.addr = addr;
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_default_update(kstat_t *ksp, int rw)
|
||||
{
|
||||
ASSERT(ksp != NULL);
|
||||
|
||||
if (rw == KSTAT_WRITE)
|
||||
return (EACCES);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
kstat_t *
|
||||
__kstat_create(const char *module, int instance, const char *name,
|
||||
const char *class, uchar_t ks_type, uint_t ks_ndata, uchar_t flags)
|
||||
{
|
||||
struct sysctl_oid *root;
|
||||
kstat_t *ksp;
|
||||
|
||||
KASSERT(instance == 0, ("instance=%d", instance));
|
||||
if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
|
||||
ASSERT(ks_ndata == 1);
|
||||
|
||||
/*
|
||||
* Allocate the main structure. We don't need to copy module/class/name
|
||||
* stuff in here, because it is only used for sysctl node creation
|
||||
* done in this function.
|
||||
*/
|
||||
ksp = malloc(sizeof (*ksp), M_KSTAT, M_WAITOK|M_ZERO);
|
||||
|
||||
ksp->ks_crtime = gethrtime();
|
||||
ksp->ks_snaptime = ksp->ks_crtime;
|
||||
ksp->ks_instance = instance;
|
||||
strncpy(ksp->ks_name, name, KSTAT_STRLEN);
|
||||
strncpy(ksp->ks_class, class, KSTAT_STRLEN);
|
||||
ksp->ks_type = ks_type;
|
||||
ksp->ks_flags = flags;
|
||||
ksp->ks_update = kstat_default_update;
|
||||
|
||||
switch (ksp->ks_type) {
|
||||
case KSTAT_TYPE_RAW:
|
||||
ksp->ks_ndata = 1;
|
||||
ksp->ks_data_size = ks_ndata;
|
||||
break;
|
||||
case KSTAT_TYPE_NAMED:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
|
||||
break;
|
||||
case KSTAT_TYPE_INTR:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
|
||||
break;
|
||||
case KSTAT_TYPE_IO:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
|
||||
break;
|
||||
case KSTAT_TYPE_TIMER:
|
||||
ksp->ks_ndata = ks_ndata;
|
||||
ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
|
||||
break;
|
||||
default:
|
||||
panic("Undefined kstat type %d\n", ksp->ks_type);
|
||||
}
|
||||
|
||||
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
|
||||
ksp->ks_data = NULL;
|
||||
} else {
|
||||
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
|
||||
if (ksp->ks_data == NULL) {
|
||||
kmem_free(ksp, sizeof (*ksp));
|
||||
ksp = NULL;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Create sysctl tree for those statistics:
|
||||
*
|
||||
* kstat.<module>.<class>.<name>.
|
||||
*/
|
||||
sysctl_ctx_init(&ksp->ks_sysctl_ctx);
|
||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
|
||||
SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
|
||||
"");
|
||||
if (root == NULL) {
|
||||
printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
|
||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||
free(ksp, M_KSTAT);
|
||||
return (NULL);
|
||||
}
|
||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
|
||||
OID_AUTO, class, CTLFLAG_RW, 0, "");
|
||||
if (root == NULL) {
|
||||
printf("%s: Cannot create kstat.%s.%s tree!\n", __func__,
|
||||
module, class);
|
||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||
free(ksp, M_KSTAT);
|
||||
return (NULL);
|
||||
}
|
||||
root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
|
||||
OID_AUTO, name, CTLFLAG_RW, 0, "");
|
||||
if (root == NULL) {
|
||||
printf("%s: Cannot create kstat.%s.%s.%s tree!\n", __func__,
|
||||
module, class, name);
|
||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||
free(ksp, M_KSTAT);
|
||||
return (NULL);
|
||||
}
|
||||
ksp->ks_sysctl_root = root;
|
||||
|
||||
return (ksp);
|
||||
}
|
||||
|
||||
static int
|
||||
kstat_sysctl(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
kstat_named_t *ksent = arg1;
|
||||
uint64_t val;
|
||||
|
||||
val = ksent->value.ui64;
|
||||
return (sysctl_handle_64(oidp, &val, 0, req));
|
||||
}
|
||||
|
||||
void
|
||||
kstat_install(kstat_t *ksp)
|
||||
{
|
||||
kstat_named_t *ksent;
|
||||
char *namelast;
|
||||
int typelast;
|
||||
|
||||
ksent = ksp->ks_data;
|
||||
if (ksp->ks_ndata == UINT32_MAX) {
|
||||
#ifdef INVARIANTS
|
||||
printf("can't handle raw ops yet!!!\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if (ksent == NULL) {
|
||||
printf("%s ksp->ks_data == NULL!!!!\n", __func__);
|
||||
return;
|
||||
}
|
||||
typelast = 0;
|
||||
namelast = NULL;
|
||||
for (int i = 0; i < ksp->ks_ndata; i++, ksent++) {
|
||||
if (ksent->data_type != 0) {
|
||||
typelast = ksent->data_type;
|
||||
namelast = ksent->name;
|
||||
}
|
||||
switch (typelast) {
|
||||
case KSTAT_DATA_INT32:
|
||||
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
||||
OID_AUTO, namelast,
|
||||
CTLTYPE_S32 | CTLFLAG_RD, ksent,
|
||||
sizeof (*ksent), kstat_sysctl, "I",
|
||||
namelast);
|
||||
break;
|
||||
case KSTAT_DATA_UINT32:
|
||||
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
||||
OID_AUTO, namelast,
|
||||
CTLTYPE_U32 | CTLFLAG_RD, ksent,
|
||||
sizeof (*ksent), kstat_sysctl, "IU",
|
||||
namelast);
|
||||
break;
|
||||
case KSTAT_DATA_INT64:
|
||||
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
||||
OID_AUTO, namelast,
|
||||
CTLTYPE_S64 | CTLFLAG_RD, ksent,
|
||||
sizeof (*ksent), kstat_sysctl, "Q",
|
||||
namelast);
|
||||
break;
|
||||
case KSTAT_DATA_UINT64:
|
||||
SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(ksp->ks_sysctl_root),
|
||||
OID_AUTO, namelast,
|
||||
CTLTYPE_U64 | CTLFLAG_RD, ksent,
|
||||
sizeof (*ksent), kstat_sysctl, "QU",
|
||||
namelast);
|
||||
break;
|
||||
default:
|
||||
panic("unsupported type: %d", typelast);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kstat_delete(kstat_t *ksp)
|
||||
{
|
||||
|
||||
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
|
||||
free(ksp, M_KSTAT);
|
||||
}
|
||||
|
||||
void
|
||||
kstat_set_string(char *dst, const char *src)
|
||||
{
|
||||
|
||||
bzero(dst, KSTAT_STRLEN);
|
||||
(void) strncpy(dst, src, KSTAT_STRLEN - 1);
|
||||
}
|
||||
|
||||
void
|
||||
kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
|
||||
{
|
||||
|
||||
kstat_set_string(knp->name, name);
|
||||
knp->data_type = data_type;
|
||||
}
|
||||
|
||||
void
|
||||
kstat_waitq_enter(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t wcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->wlastupdate;
|
||||
kiop->wlastupdate = new;
|
||||
wcnt = kiop->wcnt++;
|
||||
if (wcnt != 0) {
|
||||
kiop->wlentime += delta * wcnt;
|
||||
kiop->wtime += delta;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kstat_waitq_exit(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t wcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->wlastupdate;
|
||||
kiop->wlastupdate = new;
|
||||
wcnt = kiop->wcnt--;
|
||||
ASSERT((int)wcnt > 0);
|
||||
kiop->wlentime += delta * wcnt;
|
||||
kiop->wtime += delta;
|
||||
}
|
||||
|
||||
void
|
||||
kstat_runq_enter(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t rcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->rlastupdate;
|
||||
kiop->rlastupdate = new;
|
||||
rcnt = kiop->rcnt++;
|
||||
if (rcnt != 0) {
|
||||
kiop->rlentime += delta * rcnt;
|
||||
kiop->rtime += delta;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kstat_runq_exit(kstat_io_t *kiop)
|
||||
{
|
||||
hrtime_t new, delta;
|
||||
ulong_t rcnt;
|
||||
|
||||
new = gethrtime();
|
||||
delta = new - kiop->rlastupdate;
|
||||
kiop->rlastupdate = new;
|
||||
rcnt = kiop->rcnt--;
|
||||
ASSERT((int)rcnt > 0);
|
||||
kiop->rlentime += delta * rcnt;
|
||||
kiop->rtime += delta;
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/misc.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
char hw_serial[11] = "0";
|
||||
|
||||
static struct opensolaris_utsname hw_utsname = {
|
||||
.machine = MACHINE
|
||||
};
|
||||
|
||||
static void
|
||||
opensolaris_utsname_init(void *arg)
|
||||
{
|
||||
|
||||
hw_utsname.sysname = ostype;
|
||||
hw_utsname.nodename = prison0.pr_hostname;
|
||||
hw_utsname.release = osrelease;
|
||||
snprintf(hw_utsname.version, sizeof (hw_utsname.version),
|
||||
"%d", osreldate);
|
||||
}
|
||||
|
||||
char *
|
||||
kmem_strdup(const char *s)
|
||||
{
|
||||
char *buf;
|
||||
|
||||
buf = kmem_alloc(strlen(s) + 1, KM_SLEEP);
|
||||
strcpy(buf, s);
|
||||
return (buf);
|
||||
}
|
||||
|
||||
int
|
||||
ddi_copyin(const void *from, void *to, size_t len, int flags)
|
||||
{
|
||||
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
|
||||
if (flags & FKIOCTL) {
|
||||
memcpy(to, from, len);
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (copyin(from, to, len));
|
||||
}
|
||||
|
||||
int
|
||||
ddi_copyout(const void *from, void *to, size_t len, int flags)
|
||||
{
|
||||
/* Fake ioctl() issued by kernel, 'from' is a kernel address */
|
||||
if (flags & FKIOCTL) {
|
||||
memcpy(to, from, len);
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (copyout(from, to, len));
|
||||
}
|
||||
|
||||
int
|
||||
spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vpanic(fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
utsname_t *
|
||||
utsname(void)
|
||||
{
|
||||
return (&hw_utsname);
|
||||
}
|
||||
SYSINIT(opensolaris_utsname_init, SI_SUB_TUNABLES, SI_ORDER_ANY,
|
||||
opensolaris_utsname_init, NULL);
|
||||
@@ -0,0 +1,429 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/mntent.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
|
||||
|
||||
int
|
||||
secpolicy_nfs(cred_t *cr)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_NFS_DAEMON));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_zfs(cred_t *cr)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_sys_config(cred_t *cr, int checkonly __unused)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_ZFS_POOL_CONFIG));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_zinject(cred_t *cr)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_ZFS_INJECT));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp __unused)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_UNMOUNT));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_fs_owner(struct mount *mp, cred_t *cr)
|
||||
{
|
||||
|
||||
if (zfs_super_owner) {
|
||||
if (cr->cr_uid == mp->mnt_cred->cr_uid &&
|
||||
cr->cr_prison == mp->mnt_cred->cr_prison) {
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* This check is done in kern_link(), so we could just return 0 here.
|
||||
*/
|
||||
extern int hardlink_check_uid;
|
||||
int
|
||||
secpolicy_basic_link(vnode_t *vp, cred_t *cr)
|
||||
{
|
||||
|
||||
if (!hardlink_check_uid)
|
||||
return (0);
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_LINK));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_stky_modify(cred_t *cr)
|
||||
{
|
||||
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_remove(vnode_t *vp, cred_t *cr)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_access(cred_t *cr, vnode_t *vp, uid_t owner, accmode_t accmode)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
|
||||
if ((accmode & VREAD) && spl_priv_check_cred(cr, PRIV_VFS_READ) != 0)
|
||||
return (EACCES);
|
||||
if ((accmode & VWRITE) &&
|
||||
spl_priv_check_cred(cr, PRIV_VFS_WRITE) != 0) {
|
||||
return (EACCES);
|
||||
}
|
||||
if (accmode & VEXEC) {
|
||||
if (vp->v_type == VDIR) {
|
||||
if (spl_priv_check_cred(cr, PRIV_VFS_LOOKUP) != 0)
|
||||
return (EACCES);
|
||||
} else {
|
||||
if (spl_priv_check_cred(cr, PRIV_VFS_EXEC) != 0)
|
||||
return (EACCES);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Like secpolicy_vnode_access() but we get the actual wanted mode and the
|
||||
* current mode of the file, not the missing bits.
|
||||
*/
|
||||
int
|
||||
secpolicy_vnode_access2(cred_t *cr, vnode_t *vp, uid_t owner,
|
||||
accmode_t curmode, accmode_t wantmode)
|
||||
{
|
||||
accmode_t mode;
|
||||
|
||||
mode = ~curmode & wantmode;
|
||||
|
||||
if (mode == 0)
|
||||
return (0);
|
||||
|
||||
return (secpolicy_vnode_access(cr, vp, owner, mode));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_any_access(cred_t *cr, vnode_t *vp, uid_t owner)
|
||||
{
|
||||
static int privs[] = {
|
||||
PRIV_VFS_ADMIN,
|
||||
PRIV_VFS_READ,
|
||||
PRIV_VFS_WRITE,
|
||||
PRIV_VFS_EXEC,
|
||||
PRIV_VFS_LOOKUP
|
||||
};
|
||||
int i;
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
|
||||
/* Same as secpolicy_vnode_setdac */
|
||||
if (owner == cr->cr_uid)
|
||||
return (0);
|
||||
|
||||
for (i = 0; i < sizeof (privs)/sizeof (int); i++) {
|
||||
int priv;
|
||||
|
||||
switch (priv = privs[i]) {
|
||||
case PRIV_VFS_EXEC:
|
||||
if (vp->v_type == VDIR)
|
||||
continue;
|
||||
break;
|
||||
case PRIV_VFS_LOOKUP:
|
||||
if (vp->v_type != VDIR)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (spl_priv_check_cred(cr, priv) == 0)
|
||||
return (0);
|
||||
}
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_setdac(vnode_t *vp, cred_t *cr, uid_t owner)
|
||||
{
|
||||
|
||||
if (owner == cr->cr_uid)
|
||||
return (0);
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_ADMIN));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap,
|
||||
const struct vattr *ovap, int flags,
|
||||
int unlocked_access(void *, int, cred_t *), void *node)
|
||||
{
|
||||
int mask = vap->va_mask;
|
||||
int error;
|
||||
|
||||
if (mask & AT_SIZE) {
|
||||
if (vp->v_type == VDIR)
|
||||
return (EISDIR);
|
||||
error = unlocked_access(node, VWRITE, cr);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
if (mask & AT_MODE) {
|
||||
/*
|
||||
* If not the owner of the file then check privilege
|
||||
* for two things: the privilege to set the mode at all
|
||||
* and, if we're setting setuid, we also need permissions
|
||||
* to add the set-uid bit, if we're not the owner.
|
||||
* In the specific case of creating a set-uid root
|
||||
* file, we need even more permissions.
|
||||
*/
|
||||
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
|
||||
if (error)
|
||||
return (error);
|
||||
error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cr);
|
||||
if (error)
|
||||
return (error);
|
||||
} else {
|
||||
vap->va_mode = ovap->va_mode;
|
||||
}
|
||||
if (mask & (AT_UID | AT_GID)) {
|
||||
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* To change the owner of a file, or change the group of
|
||||
* a file to a group of which we are not a member, the
|
||||
* caller must have privilege.
|
||||
*/
|
||||
if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
|
||||
((mask & AT_GID) && vap->va_gid != ovap->va_gid &&
|
||||
!groupmember(vap->va_gid, cr))) {
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) != 0) {
|
||||
error = spl_priv_check_cred(cr, PRIV_VFS_CHOWN);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
|
||||
((mask & AT_GID) && vap->va_gid != ovap->va_gid)) {
|
||||
secpolicy_setid_clear(vap, vp, cr);
|
||||
}
|
||||
}
|
||||
if (mask & (AT_ATIME | AT_MTIME)) {
|
||||
/*
|
||||
* From utimes(2):
|
||||
* If times is NULL, ... The caller must be the owner of
|
||||
* the file, have permission to write the file, or be the
|
||||
* super-user.
|
||||
* If times is non-NULL, ... The caller must be the owner of
|
||||
* the file or be the super-user.
|
||||
*/
|
||||
error = secpolicy_vnode_setdac(vp, cr, ovap->va_uid);
|
||||
if (error && (vap->va_vaflags & VA_UTIMES_NULL))
|
||||
error = unlocked_access(node, VWRITE, cr);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_create_gid(cred_t *cr)
|
||||
{
|
||||
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid)
|
||||
{
|
||||
|
||||
if (groupmember(gid, cr))
|
||||
return (0);
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_SETGID));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr,
|
||||
boolean_t issuidroot __unused)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID));
|
||||
}
|
||||
|
||||
void
|
||||
secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return;
|
||||
|
||||
if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) {
|
||||
if (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)) {
|
||||
vap->va_mask |= AT_MODE;
|
||||
vap->va_mode &= ~(S_ISUID|S_ISGID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap,
|
||||
const struct vattr *ovap, cred_t *cr)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Privileged processes may set the sticky bit on non-directories,
|
||||
* as well as set the setgid bit on a file with a group that the process
|
||||
* is not a member of. Both of these are allowed in jail(8).
|
||||
*/
|
||||
if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) {
|
||||
if (spl_priv_check_cred(cr, PRIV_VFS_STICKYFILE))
|
||||
return (EFTYPE);
|
||||
}
|
||||
/*
|
||||
* Check for privilege if attempting to set the
|
||||
* group-id bit.
|
||||
*/
|
||||
if ((vap->va_mode & S_ISGID) != 0) {
|
||||
error = secpolicy_vnode_setids_setgids(vp, cr, ovap->va_gid);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
/*
|
||||
* Deny setting setuid if we are not the file owner.
|
||||
*/
|
||||
if ((vap->va_mode & S_ISUID) && ovap->va_uid != cr->cr_uid) {
|
||||
error = spl_priv_check_cred(cr, PRIV_VFS_ADMIN);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_fs_mount(cred_t *cr, vnode_t *mvp, struct mount *vfsp)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_owner(vnode_t *vp, cred_t *cr, uid_t owner)
|
||||
{
|
||||
|
||||
if (owner == cr->cr_uid)
|
||||
return (0);
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
|
||||
/* XXX: vfs_suser()? */
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_OWNER));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_vnode_chown(vnode_t *vp, cred_t *cr, uid_t owner)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_CHOWN));
|
||||
}
|
||||
|
||||
void
|
||||
secpolicy_fs_mount_clearopts(cred_t *cr, struct mount *vfsp)
|
||||
{
|
||||
|
||||
if (spl_priv_check_cred(cr, PRIV_VFS_MOUNT_NONUSER) != 0) {
|
||||
MNT_ILOCK(vfsp);
|
||||
vfsp->vfs_flag |= VFS_NOSETUID | MNT_USER;
|
||||
vfs_clearmntopt(vfsp, MNTOPT_SETUID);
|
||||
vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 0);
|
||||
MNT_IUNLOCK(vfsp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check privileges for setting xvattr attributes
|
||||
*/
|
||||
int
|
||||
secpolicy_xvattr(vnode_t *vp, xvattr_t *xvap, uid_t owner, cred_t *cr,
|
||||
vtype_t vtype)
|
||||
{
|
||||
|
||||
if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
|
||||
return (0);
|
||||
return (spl_priv_check_cred(cr, PRIV_VFS_SYSFLAGS));
|
||||
}
|
||||
|
||||
int
|
||||
secpolicy_smb(cred_t *cr)
|
||||
{
|
||||
|
||||
return (spl_priv_check_cred(cr, PRIV_NETSMB));
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/list.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/procfs_list.h>
|
||||
|
||||
void
|
||||
seq_printf(struct seq_file *m, const char *fmt, ...)
|
||||
{}
|
||||
|
||||
void
|
||||
procfs_list_install(const char *module,
|
||||
const char *name,
|
||||
mode_t mode,
|
||||
procfs_list_t *procfs_list,
|
||||
int (*show)(struct seq_file *f, void *p),
|
||||
int (*show_header)(struct seq_file *f),
|
||||
int (*clear)(procfs_list_t *procfs_list),
|
||||
size_t procfs_list_node_off)
|
||||
{
|
||||
mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&procfs_list->pl_list,
|
||||
procfs_list_node_off + sizeof (procfs_list_node_t),
|
||||
procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
|
||||
procfs_list->pl_next_id = 1;
|
||||
procfs_list->pl_node_offset = procfs_list_node_off;
|
||||
}
|
||||
|
||||
void
|
||||
procfs_list_uninstall(procfs_list_t *procfs_list)
|
||||
{}
|
||||
|
||||
void
|
||||
procfs_list_destroy(procfs_list_t *procfs_list)
|
||||
{
|
||||
ASSERT(list_is_empty(&procfs_list->pl_list));
|
||||
list_destroy(&procfs_list->pl_list);
|
||||
mutex_destroy(&procfs_list->pl_lock);
|
||||
}
|
||||
|
||||
#define NODE_ID(procfs_list, obj) \
|
||||
(((procfs_list_node_t *)(((char *)obj) + \
|
||||
(procfs_list)->pl_node_offset))->pln_id)
|
||||
|
||||
void
|
||||
procfs_list_add(procfs_list_t *procfs_list, void *p)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
|
||||
NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
|
||||
list_insert_tail(&procfs_list->pl_list, p);
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/string.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <machine/stdarg.h>
|
||||
|
||||
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
||||
|
||||
#define IS_ALPHA(c) \
|
||||
(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
|
||||
|
||||
char *
|
||||
strpbrk(const char *s, const char *b)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
do {
|
||||
for (p = b; *p != '\0' && *p != *s; ++p)
|
||||
;
|
||||
if (*p != '\0')
|
||||
return ((char *)s);
|
||||
} while (*s++);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a string into a valid C identifier by replacing invalid
|
||||
* characters with '_'. Also makes sure the string is nul-terminated
|
||||
* and takes up at most n bytes.
|
||||
*/
|
||||
void
|
||||
strident_canon(char *s, size_t n)
|
||||
{
|
||||
char c;
|
||||
char *end = s + n - 1;
|
||||
|
||||
if ((c = *s) == 0)
|
||||
return;
|
||||
|
||||
if (!IS_ALPHA(c) && c != '_')
|
||||
*s = '_';
|
||||
|
||||
while (s < end && ((c = *(++s)) != 0)) {
|
||||
if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
|
||||
*s = '_';
|
||||
}
|
||||
*s = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not change the length of the returned string; it must be freed
|
||||
* with strfree().
|
||||
*/
|
||||
char *
|
||||
kmem_asprintf(const char *fmt, ...)
|
||||
{
|
||||
int size;
|
||||
va_list adx;
|
||||
char *buf;
|
||||
|
||||
va_start(adx, fmt);
|
||||
size = vsnprintf(NULL, 0, fmt, adx) + 1;
|
||||
va_end(adx);
|
||||
|
||||
buf = kmem_alloc(size, KM_SLEEP);
|
||||
|
||||
va_start(adx, fmt);
|
||||
(void) vsnprintf(buf, size, fmt, adx);
|
||||
va_end(adx);
|
||||
|
||||
return (buf);
|
||||
}
|
||||
|
||||
void
|
||||
kmem_strfree(char *str)
|
||||
{
|
||||
ASSERT(str != NULL);
|
||||
kmem_free(str, strlen(str) + 1);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/misc.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
int
|
||||
ddi_strtol(const char *str, char **nptr, int base, long *result)
|
||||
{
|
||||
|
||||
*result = strtol(str, nptr, base);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
|
||||
{
|
||||
|
||||
if (str == hw_serial) {
|
||||
*result = prison0.pr_hostid;
|
||||
return (0);
|
||||
}
|
||||
|
||||
*result = strtoul(str, nptr, base);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ddi_strtoull(const char *str, char **nptr, int base, unsigned long long *result)
|
||||
{
|
||||
|
||||
*result = (unsigned long long)strtouq(str, nptr, base);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ddi_strtoll(const char *str, char **nptr, int base, long long *result)
|
||||
{
|
||||
|
||||
*result = (long long)strtoq(str, nptr, base);
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/sysevent.h>
|
||||
#include <sys/fm/protocol.h>
|
||||
#include <sys/fm/util.h>
|
||||
#include <sys/bus.h>
|
||||
|
||||
static int
|
||||
log_sysevent(nvlist_t *event)
|
||||
{
|
||||
struct sbuf *sb;
|
||||
const char *type;
|
||||
char typestr[128];
|
||||
nvpair_t *elem = NULL;
|
||||
|
||||
sb = sbuf_new_auto();
|
||||
if (sb == NULL)
|
||||
return (ENOMEM);
|
||||
type = NULL;
|
||||
|
||||
while ((elem = nvlist_next_nvpair(event, elem)) != NULL) {
|
||||
switch (nvpair_type(elem)) {
|
||||
case DATA_TYPE_BOOLEAN:
|
||||
{
|
||||
boolean_t value;
|
||||
|
||||
(void) nvpair_value_boolean_value(elem, &value);
|
||||
sbuf_printf(sb, " %s=%s", nvpair_name(elem),
|
||||
value ? "true" : "false");
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT8:
|
||||
{
|
||||
uint8_t value;
|
||||
|
||||
(void) nvpair_value_uint8(elem, &value);
|
||||
sbuf_printf(sb, " %s=%hhu", nvpair_name(elem), value);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_INT32:
|
||||
{
|
||||
int32_t value;
|
||||
|
||||
(void) nvpair_value_int32(elem, &value);
|
||||
sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
|
||||
(intmax_t)value);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT32:
|
||||
{
|
||||
uint32_t value;
|
||||
|
||||
(void) nvpair_value_uint32(elem, &value);
|
||||
sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
|
||||
(uintmax_t)value);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_INT64:
|
||||
{
|
||||
int64_t value;
|
||||
|
||||
(void) nvpair_value_int64(elem, &value);
|
||||
sbuf_printf(sb, " %s=%jd", nvpair_name(elem),
|
||||
(intmax_t)value);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT64:
|
||||
{
|
||||
uint64_t value;
|
||||
|
||||
(void) nvpair_value_uint64(elem, &value);
|
||||
sbuf_printf(sb, " %s=%ju", nvpair_name(elem),
|
||||
(uintmax_t)value);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_STRING:
|
||||
{
|
||||
char *value;
|
||||
|
||||
(void) nvpair_value_string(elem, &value);
|
||||
sbuf_printf(sb, " %s=%s", nvpair_name(elem), value);
|
||||
if (strcmp(FM_CLASS, nvpair_name(elem)) == 0)
|
||||
type = value;
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT8_ARRAY:
|
||||
{
|
||||
uint8_t *value;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_uint8_array(elem, &value, &nelem);
|
||||
sbuf_printf(sb, " %s=", nvpair_name(elem));
|
||||
for (ii = 0; ii < nelem; ii++)
|
||||
sbuf_printf(sb, "%02hhx", value[ii]);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT16_ARRAY:
|
||||
{
|
||||
uint16_t *value;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_uint16_array(elem, &value, &nelem);
|
||||
sbuf_printf(sb, " %s=", nvpair_name(elem));
|
||||
for (ii = 0; ii < nelem; ii++)
|
||||
sbuf_printf(sb, "%04hx", value[ii]);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT32_ARRAY:
|
||||
{
|
||||
uint32_t *value;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_uint32_array(elem, &value, &nelem);
|
||||
sbuf_printf(sb, " %s=", nvpair_name(elem));
|
||||
for (ii = 0; ii < nelem; ii++)
|
||||
sbuf_printf(sb, "%08jx", (uintmax_t)value[ii]);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_INT64_ARRAY:
|
||||
{
|
||||
int64_t *value;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_int64_array(elem, &value, &nelem);
|
||||
sbuf_printf(sb, " %s=", nvpair_name(elem));
|
||||
for (ii = 0; ii < nelem; ii++)
|
||||
sbuf_printf(sb, "%016lld",
|
||||
(long long)value[ii]);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_UINT64_ARRAY:
|
||||
{
|
||||
uint64_t *value;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_uint64_array(elem, &value, &nelem);
|
||||
sbuf_printf(sb, " %s=", nvpair_name(elem));
|
||||
for (ii = 0; ii < nelem; ii++)
|
||||
sbuf_printf(sb, "%016jx", (uintmax_t)value[ii]);
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_STRING_ARRAY:
|
||||
{
|
||||
char **strarr;
|
||||
uint_t ii, nelem;
|
||||
|
||||
(void) nvpair_value_string_array(elem, &strarr, &nelem);
|
||||
|
||||
for (ii = 0; ii < nelem; ii++) {
|
||||
if (strarr[ii] == NULL) {
|
||||
sbuf_printf(sb, " <NULL>");
|
||||
continue;
|
||||
}
|
||||
|
||||
sbuf_printf(sb, " %s", strarr[ii]);
|
||||
if (strcmp(FM_CLASS, strarr[ii]) == 0)
|
||||
type = strarr[ii];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DATA_TYPE_NVLIST:
|
||||
/* XXX - requires recursing in log_sysevent */
|
||||
break;
|
||||
default:
|
||||
printf("%s: type %d is not implemented\n", __func__,
|
||||
nvpair_type(elem));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sbuf_finish(sb) != 0) {
|
||||
sbuf_delete(sb);
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
if (type == NULL)
|
||||
type = "";
|
||||
if (strncmp(type, "ESC_ZFS_", 8) == 0) {
|
||||
snprintf(typestr, sizeof (typestr), "misc.fs.zfs.%s", type + 8);
|
||||
type = typestr;
|
||||
}
|
||||
devctl_notify("ZFS", "ZFS", type, sbuf_data(sb));
|
||||
sbuf_delete(sb);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
sysevent_worker(void *arg __unused)
|
||||
{
|
||||
zfs_zevent_t *ze;
|
||||
nvlist_t *event;
|
||||
uint64_t dropped = 0;
|
||||
uint64_t dst_size;
|
||||
int error;
|
||||
|
||||
zfs_zevent_init(&ze);
|
||||
for (;;) {
|
||||
dst_size = 131072;
|
||||
dropped = 0;
|
||||
event = NULL;
|
||||
error = zfs_zevent_next(ze, &event,
|
||||
&dst_size, &dropped);
|
||||
if (error) {
|
||||
error = zfs_zevent_wait(ze);
|
||||
if (error == ESHUTDOWN)
|
||||
break;
|
||||
} else {
|
||||
VERIFY(event != NULL);
|
||||
log_sysevent(event);
|
||||
nvlist_free(event);
|
||||
}
|
||||
}
|
||||
zfs_zevent_destroy(ze);
|
||||
kthread_exit();
|
||||
}
|
||||
|
||||
void
|
||||
ddi_sysevent_init(void)
|
||||
{
|
||||
kproc_kthread_add(sysevent_worker, NULL, &zfsproc, NULL, 0, 0,
|
||||
"zfskern", "sysevent");
|
||||
}
|
||||
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Spectra Logic Corporation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#include <vm/uma.h>
|
||||
|
||||
static uint_t taskq_tsd;
|
||||
static uma_zone_t taskq_zone;
|
||||
|
||||
taskq_t *system_taskq = NULL;
|
||||
taskq_t *system_delay_taskq = NULL;
|
||||
taskq_t *dynamic_taskq = NULL;
|
||||
|
||||
extern int uma_align_cache;
|
||||
|
||||
#define TQ_MASK uma_align_cache
|
||||
#define TQ_PTR_MASK ~uma_align_cache
|
||||
|
||||
#define TIMEOUT_TASK 1
|
||||
#define NORMAL_TASK 2
|
||||
|
||||
static int
|
||||
taskqent_init(void *mem, int size, int flags)
|
||||
{
|
||||
bzero(mem, sizeof (taskq_ent_t));
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
taskqent_ctor(void *mem, int size, void *arg, int flags)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
taskqent_dtor(void *mem, int size, void *arg)
|
||||
{
|
||||
taskq_ent_t *ent = mem;
|
||||
|
||||
ent->tqent_gen = (ent->tqent_gen + 1) & TQ_MASK;
|
||||
}
|
||||
|
||||
static void
|
||||
system_taskq_init(void *arg)
|
||||
{
|
||||
|
||||
tsd_create(&taskq_tsd, NULL);
|
||||
taskq_zone = uma_zcreate("taskq_zone", sizeof (taskq_ent_t),
|
||||
taskqent_ctor, taskqent_dtor, taskqent_init, NULL,
|
||||
UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
|
||||
system_taskq = taskq_create("system_taskq", mp_ncpus, minclsyspri,
|
||||
0, 0, 0);
|
||||
system_delay_taskq = taskq_create("system_delay_taskq", mp_ncpus,
|
||||
minclsyspri, 0, 0, 0);
|
||||
}
|
||||
SYSINIT(system_taskq_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_init,
|
||||
NULL);
|
||||
|
||||
static void
|
||||
system_taskq_fini(void *arg)
|
||||
{
|
||||
|
||||
taskq_destroy(system_taskq);
|
||||
uma_zdestroy(taskq_zone);
|
||||
tsd_destroy(&taskq_tsd);
|
||||
}
|
||||
SYSUNINIT(system_taskq_fini, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_fini,
|
||||
NULL);
|
||||
|
||||
static void
|
||||
taskq_tsd_set(void *context)
|
||||
{
|
||||
taskq_t *tq = context;
|
||||
|
||||
tsd_set(taskq_tsd, tq);
|
||||
}
|
||||
|
||||
static taskq_t *
|
||||
taskq_create_with_init(const char *name, int nthreads, pri_t pri,
|
||||
int minalloc __unused, int maxalloc __unused, uint_t flags)
|
||||
{
|
||||
taskq_t *tq;
|
||||
|
||||
if ((flags & TASKQ_THREADS_CPU_PCT) != 0)
|
||||
nthreads = MAX((mp_ncpus * nthreads) / 100, 1);
|
||||
|
||||
tq = kmem_alloc(sizeof (*tq), KM_SLEEP);
|
||||
tq->tq_queue = taskqueue_create(name, M_WAITOK,
|
||||
taskqueue_thread_enqueue, &tq->tq_queue);
|
||||
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT,
|
||||
taskq_tsd_set, tq);
|
||||
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN,
|
||||
taskq_tsd_set, NULL);
|
||||
(void) taskqueue_start_threads(&tq->tq_queue, nthreads, pri,
|
||||
"%s", name);
|
||||
|
||||
return ((taskq_t *)tq);
|
||||
}
|
||||
|
||||
taskq_t *
|
||||
taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused,
|
||||
int maxalloc __unused, uint_t flags)
|
||||
{
|
||||
|
||||
return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc,
|
||||
flags));
|
||||
}
|
||||
|
||||
taskq_t *
|
||||
taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc,
|
||||
int maxalloc, proc_t *proc __unused, uint_t flags)
|
||||
{
|
||||
|
||||
return (taskq_create_with_init(name, nthreads, pri, minalloc, maxalloc,
|
||||
flags));
|
||||
}
|
||||
|
||||
void
|
||||
taskq_destroy(taskq_t *tq)
|
||||
{
|
||||
|
||||
taskqueue_free(tq->tq_queue);
|
||||
kmem_free(tq, sizeof (*tq));
|
||||
}
|
||||
|
||||
int
|
||||
taskq_member(taskq_t *tq, kthread_t *thread)
|
||||
{
|
||||
|
||||
return (taskqueue_member(tq->tq_queue, thread));
|
||||
}
|
||||
|
||||
taskq_t *
|
||||
taskq_of_curthread(void)
|
||||
{
|
||||
return (tsd_get(taskq_tsd));
|
||||
}
|
||||
|
||||
int
|
||||
taskq_cancel_id(taskq_t *tq, taskqid_t tid)
|
||||
{
|
||||
uint32_t pend;
|
||||
int rc;
|
||||
taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK);
|
||||
|
||||
if (ent == NULL)
|
||||
return (0);
|
||||
if ((tid & TQ_MASK) != ent->tqent_gen)
|
||||
return (0);
|
||||
if (ent->tqent_type == TIMEOUT_TASK) {
|
||||
rc = taskqueue_cancel_timeout(tq->tq_queue,
|
||||
&ent->tqent_timeout_task, &pend);
|
||||
} else
|
||||
rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
|
||||
if (rc == EBUSY)
|
||||
taskq_wait_id(tq, tid);
|
||||
else
|
||||
uma_zfree(taskq_zone, ent);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_run(void *arg, int pending __unused)
|
||||
{
|
||||
taskq_ent_t *task = arg;
|
||||
|
||||
task->tqent_func(task->tqent_arg);
|
||||
uma_zfree(taskq_zone, task);
|
||||
}
|
||||
|
||||
taskqid_t
|
||||
taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
|
||||
uint_t flags, clock_t expire_time)
|
||||
{
|
||||
taskq_ent_t *task;
|
||||
taskqid_t tid;
|
||||
clock_t timo;
|
||||
int mflag;
|
||||
|
||||
timo = expire_time - ddi_get_lbolt();
|
||||
if (timo <= 0)
|
||||
return (taskq_dispatch(tq, func, arg, flags));
|
||||
|
||||
if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
|
||||
mflag = M_WAITOK;
|
||||
else
|
||||
mflag = M_NOWAIT;
|
||||
|
||||
task = uma_zalloc(taskq_zone, mflag);
|
||||
if (task == NULL)
|
||||
return (0);
|
||||
tid = (uintptr_t)task;
|
||||
MPASS((tid & TQ_MASK) == 0);
|
||||
task->tqent_func = func;
|
||||
task->tqent_arg = arg;
|
||||
task->tqent_type = TIMEOUT_TASK;
|
||||
tid |= task->tqent_gen;
|
||||
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
|
||||
taskq_run, task);
|
||||
|
||||
taskqueue_enqueue_timeout(tq->tq_queue, &task->tqent_timeout_task,
|
||||
timo);
|
||||
return (tid);
|
||||
}
|
||||
|
||||
taskqid_t
|
||||
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
||||
{
|
||||
taskq_ent_t *task;
|
||||
int mflag, prio;
|
||||
taskqid_t tid;
|
||||
|
||||
if ((flags & (TQ_SLEEP | TQ_NOQUEUE)) == TQ_SLEEP)
|
||||
mflag = M_WAITOK;
|
||||
else
|
||||
mflag = M_NOWAIT;
|
||||
/*
|
||||
* If TQ_FRONT is given, we want higher priority for this task, so it
|
||||
* can go at the front of the queue.
|
||||
*/
|
||||
prio = !!(flags & TQ_FRONT);
|
||||
|
||||
task = uma_zalloc(taskq_zone, mflag);
|
||||
if (task == NULL)
|
||||
return (0);
|
||||
|
||||
tid = (uintptr_t)task;
|
||||
MPASS((tid & TQ_MASK) == 0);
|
||||
task->tqent_func = func;
|
||||
task->tqent_arg = arg;
|
||||
task->tqent_type = NORMAL_TASK;
|
||||
TASK_INIT(&task->tqent_task, prio, taskq_run, task);
|
||||
tid |= task->tqent_gen;
|
||||
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
|
||||
return (tid);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_run_ent(void *arg, int pending __unused)
|
||||
{
|
||||
taskq_ent_t *task = arg;
|
||||
|
||||
task->tqent_func(task->tqent_arg);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
|
||||
taskq_ent_t *task)
|
||||
{
|
||||
int prio;
|
||||
|
||||
/*
|
||||
* If TQ_FRONT is given, we want higher priority for this task, so it
|
||||
* can go at the front of the queue.
|
||||
*/
|
||||
prio = !!(flags & TQ_FRONT);
|
||||
|
||||
task->tqent_func = func;
|
||||
task->tqent_arg = arg;
|
||||
|
||||
TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task);
|
||||
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait(taskq_t *tq)
|
||||
{
|
||||
taskqueue_quiesce(tq->tq_queue);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait_id(taskq_t *tq, taskqid_t tid)
|
||||
{
|
||||
taskq_ent_t *ent = (void*)(tid & TQ_PTR_MASK);
|
||||
|
||||
if ((tid & TQ_MASK) != ent->tqent_gen)
|
||||
return;
|
||||
|
||||
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused)
|
||||
{
|
||||
taskqueue_drain_all(tq->tq_queue);
|
||||
}
|
||||
|
||||
int
|
||||
taskq_empty_ent(taskq_ent_t *t)
|
||||
{
|
||||
return (t->tqent_task.ta_pending == 0);
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
|
||||
/* All Rights Reserved */
|
||||
|
||||
/*
|
||||
* University Copyright- Copyright (c) 1982, 1986, 1988
|
||||
* The Regents of the University of California
|
||||
* All Rights Reserved
|
||||
*
|
||||
* University Acknowledgment- Portions of this document are derived from
|
||||
* software developed by the University of California, Berkeley, and its
|
||||
* contributors.
|
||||
*/
|
||||
|
||||
/*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
/*
|
||||
* same as uiomove() but doesn't modify uio structure.
|
||||
* return in cbytes how many bytes were copied.
|
||||
*/
|
||||
int
|
||||
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
|
||||
{
|
||||
struct iovec small_iovec[1];
|
||||
struct uio small_uio_clone;
|
||||
struct uio *uio_clone;
|
||||
int error;
|
||||
|
||||
ASSERT3U(uio->uio_rw, ==, rw);
|
||||
if (uio->uio_iovcnt == 1) {
|
||||
small_uio_clone = *uio;
|
||||
small_iovec[0] = *uio->uio_iov;
|
||||
small_uio_clone.uio_iov = small_iovec;
|
||||
uio_clone = &small_uio_clone;
|
||||
} else {
|
||||
uio_clone = cloneuio(uio);
|
||||
}
|
||||
|
||||
error = vn_io_fault_uiomove(p, n, uio_clone);
|
||||
*cbytes = uio->uio_resid - uio_clone->uio_resid;
|
||||
if (uio_clone != &small_uio_clone)
|
||||
free(uio_clone, M_IOV);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the next n chars out of *uiop.
|
||||
*/
|
||||
void
|
||||
uioskip(uio_t *uio, size_t n)
|
||||
{
|
||||
enum uio_seg segflg;
|
||||
|
||||
/* For the full compatibility with illumos. */
|
||||
if (n > uio->uio_resid)
|
||||
return;
|
||||
|
||||
segflg = uio->uio_segflg;
|
||||
uio->uio_segflg = UIO_NOCOPY;
|
||||
uiomove(NULL, n, uio->uio_rw, uio);
|
||||
uio->uio_segflg = segflg;
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
/*
|
||||
* Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/libkern.h>
|
||||
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
MALLOC_DECLARE(M_MOUNT);
|
||||
|
||||
void
|
||||
vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
|
||||
int flags __unused)
|
||||
{
|
||||
struct vfsopt *opt;
|
||||
size_t namesize;
|
||||
int locked;
|
||||
|
||||
if (!(locked = mtx_owned(MNT_MTX(vfsp))))
|
||||
MNT_ILOCK(vfsp);
|
||||
|
||||
if (vfsp->mnt_opt == NULL) {
|
||||
void *opts;
|
||||
|
||||
MNT_IUNLOCK(vfsp);
|
||||
opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
|
||||
MNT_ILOCK(vfsp);
|
||||
if (vfsp->mnt_opt == NULL) {
|
||||
vfsp->mnt_opt = opts;
|
||||
TAILQ_INIT(vfsp->mnt_opt);
|
||||
} else {
|
||||
free(opts, M_MOUNT);
|
||||
}
|
||||
}
|
||||
|
||||
MNT_IUNLOCK(vfsp);
|
||||
|
||||
opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
|
||||
namesize = strlen(name) + 1;
|
||||
opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
|
||||
strlcpy(opt->name, name, namesize);
|
||||
opt->pos = -1;
|
||||
opt->seen = 1;
|
||||
if (arg == NULL) {
|
||||
opt->value = NULL;
|
||||
opt->len = 0;
|
||||
} else {
|
||||
opt->len = strlen(arg) + 1;
|
||||
opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
|
||||
bcopy(arg, opt->value, opt->len);
|
||||
}
|
||||
|
||||
MNT_ILOCK(vfsp);
|
||||
TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
|
||||
if (!locked)
|
||||
MNT_IUNLOCK(vfsp);
|
||||
}
|
||||
|
||||
void
|
||||
vfs_clearmntopt(vfs_t *vfsp, const char *name)
|
||||
{
|
||||
int locked;
|
||||
|
||||
if (!(locked = mtx_owned(MNT_MTX(vfsp))))
|
||||
MNT_ILOCK(vfsp);
|
||||
vfs_deleteopt(vfsp->mnt_opt, name);
|
||||
if (!locked)
|
||||
MNT_IUNLOCK(vfsp);
|
||||
}
|
||||
|
||||
int
|
||||
vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
|
||||
{
|
||||
struct vfsoptlist *opts = vfsp->mnt_optnew;
|
||||
int error;
|
||||
|
||||
if (opts == NULL)
|
||||
return (0);
|
||||
error = vfs_getopt(opts, opt, (void **)argp, NULL);
|
||||
return (error != 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
int
|
||||
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
||||
char *fspec, int fsflags)
|
||||
{
|
||||
struct vfsconf *vfsp;
|
||||
struct mount *mp;
|
||||
vnode_t *vp, *mvp;
|
||||
struct ucred *cr;
|
||||
int error;
|
||||
|
||||
ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
|
||||
|
||||
vp = *vpp;
|
||||
*vpp = NULL;
|
||||
error = 0;
|
||||
|
||||
/*
|
||||
* Be ultra-paranoid about making sure the type and fspath
|
||||
* variables will fit in our mp buffers, including the
|
||||
* terminating NUL.
|
||||
*/
|
||||
if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
|
||||
error = ENAMETOOLONG;
|
||||
if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
|
||||
error = ENODEV;
|
||||
if (error == 0 && vp->v_type != VDIR)
|
||||
error = ENOTDIR;
|
||||
/*
|
||||
* We need vnode lock to protect v_mountedhere and vnode interlock
|
||||
* to protect v_iflag.
|
||||
*/
|
||||
if (error == 0) {
|
||||
VI_LOCK(vp);
|
||||
if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
|
||||
vp->v_iflag |= VI_MOUNT;
|
||||
else
|
||||
error = EBUSY;
|
||||
VI_UNLOCK(vp);
|
||||
}
|
||||
if (error != 0) {
|
||||
vput(vp);
|
||||
return (error);
|
||||
}
|
||||
VOP_UNLOCK1(vp);
|
||||
|
||||
/*
|
||||
* Allocate and initialize the filesystem.
|
||||
* We don't want regular user that triggered snapshot mount to be able
|
||||
* to unmount it, so pass credentials of the parent mount.
|
||||
*/
|
||||
mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
|
||||
|
||||
mp->mnt_optnew = NULL;
|
||||
vfs_setmntopt(mp, "from", fspec, 0);
|
||||
mp->mnt_optnew = mp->mnt_opt;
|
||||
mp->mnt_opt = NULL;
|
||||
|
||||
/*
|
||||
* Set the mount level flags.
|
||||
*/
|
||||
mp->mnt_flag = fsflags & MNT_UPDATEMASK;
|
||||
/*
|
||||
* Snapshots are always read-only.
|
||||
*/
|
||||
mp->mnt_flag |= MNT_RDONLY;
|
||||
/*
|
||||
* We don't want snapshots to allow access to vulnerable setuid
|
||||
* programs, so we turn off setuid when mounting snapshots.
|
||||
*/
|
||||
mp->mnt_flag |= MNT_NOSUID;
|
||||
/*
|
||||
* We don't want snapshots to be visible in regular
|
||||
* mount(8) and df(1) output.
|
||||
*/
|
||||
mp->mnt_flag |= MNT_IGNORE;
|
||||
/*
|
||||
* XXX: This is evil, but we can't mount a snapshot as a regular user.
|
||||
* XXX: Is is safe when snapshot is mounted from within a jail?
|
||||
*/
|
||||
cr = td->td_ucred;
|
||||
td->td_ucred = kcred;
|
||||
error = VFS_MOUNT(mp);
|
||||
td->td_ucred = cr;
|
||||
|
||||
if (error != 0) {
|
||||
/*
|
||||
* Clear VI_MOUNT and decrement the use count "atomically",
|
||||
* under the vnode lock. This is not strictly required,
|
||||
* but makes it easier to reason about the life-cycle and
|
||||
* ownership of the covered vnode.
|
||||
*/
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
VI_LOCK(vp);
|
||||
vp->v_iflag &= ~VI_MOUNT;
|
||||
VI_UNLOCK(vp);
|
||||
vput(vp);
|
||||
vfs_unbusy(mp);
|
||||
vfs_freeopts(mp->mnt_optnew);
|
||||
mp->mnt_vnodecovered = NULL;
|
||||
vfs_mount_destroy(mp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (mp->mnt_opt != NULL)
|
||||
vfs_freeopts(mp->mnt_opt);
|
||||
mp->mnt_opt = mp->mnt_optnew;
|
||||
(void) VFS_STATFS(mp, &mp->mnt_stat);
|
||||
|
||||
/*
|
||||
* Prevent external consumers of mount options from reading
|
||||
* mnt_optnew.
|
||||
*/
|
||||
mp->mnt_optnew = NULL;
|
||||
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
#ifdef FREEBSD_NAMECACHE
|
||||
cache_purge(vp);
|
||||
#endif
|
||||
VI_LOCK(vp);
|
||||
vp->v_iflag &= ~VI_MOUNT;
|
||||
VI_UNLOCK(vp);
|
||||
|
||||
vp->v_mountedhere = mp;
|
||||
/* Put the new filesystem on the mount list. */
|
||||
mtx_lock(&mountlist_mtx);
|
||||
TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
|
||||
mtx_unlock(&mountlist_mtx);
|
||||
vfs_event_signal(NULL, VQ_MOUNT, 0);
|
||||
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
|
||||
panic("mount: lost mount");
|
||||
VOP_UNLOCK1(vp);
|
||||
#if __FreeBSD_version >= 1300048
|
||||
vfs_op_exit(mp);
|
||||
#endif
|
||||
vfs_unbusy(mp);
|
||||
*vpp = mvp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
|
||||
* asynchronously using a taskq. This can avoid deadlocks caused by re-entering
|
||||
* the file system as a result of releasing the vnode. Note, file systems
|
||||
* already have to handle the race where the vnode is incremented before the
|
||||
* inactive routine is called and does its locking.
|
||||
*
|
||||
* Warning: Excessive use of this routine can lead to performance problems.
|
||||
* This is because taskqs throttle back allocation if too many are created.
|
||||
*/
|
||||
void
|
||||
vn_rele_async(vnode_t *vp, taskq_t *taskq)
|
||||
{
|
||||
VERIFY(vp->v_count > 0);
|
||||
if (refcount_release_if_not_last(&vp->v_usecount)) {
|
||||
#if __FreeBSD_version < 1300045
|
||||
vdrop(vp);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
VERIFY(taskq_dispatch((taskq_t *)taskq,
|
||||
(task_func_t *)vrele, vp, TQ_SLEEP) != 0);
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2013 EMC Corp.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/freebsd_rwlock.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pager.h>
|
||||
|
||||
const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
|
||||
const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
|
||||
const int zfs_vm_pagerret_ok = VM_PAGER_OK;
|
||||
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
|
||||
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
|
||||
|
||||
void
|
||||
zfs_vmobject_assert_wlocked(vm_object_t object)
|
||||
{
|
||||
|
||||
/*
|
||||
* This is not ideal because FILE/LINE used by assertions will not
|
||||
* be too helpful, but it must be an hard function for
|
||||
* compatibility reasons.
|
||||
*/
|
||||
VM_OBJECT_ASSERT_WLOCKED(object);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_vmobject_wlock(vm_object_t object)
|
||||
{
|
||||
|
||||
VM_OBJECT_WLOCK(object);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_vmobject_wunlock(vm_object_t object)
|
||||
{
|
||||
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/zmod.h>
|
||||
#if __FreeBSD_version >= 1300041
|
||||
#include <contrib/zlib/zlib.h>
|
||||
#else
|
||||
#include <sys/zlib.h>
|
||||
#endif
|
||||
#include <sys/kobj.h>
|
||||
|
||||
|
||||
/*ARGSUSED*/
|
||||
static void *
|
||||
zcalloc(void *opaque, uint_t items, uint_t size)
|
||||
{
|
||||
|
||||
return (malloc((size_t)items*size, M_SOLARIS, M_NOWAIT));
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static void
|
||||
zcfree(void *opaque, void *ptr)
|
||||
{
|
||||
|
||||
free(ptr, M_SOLARIS);
|
||||
}
|
||||
|
||||
static int
|
||||
zlib_deflateInit(z_stream *stream, int level)
|
||||
{
|
||||
|
||||
stream->zalloc = zcalloc;
|
||||
stream->opaque = NULL;
|
||||
stream->zfree = zcfree;
|
||||
|
||||
return (deflateInit(stream, level));
|
||||
}
|
||||
|
||||
static int
|
||||
zlib_deflate(z_stream *stream, int flush)
|
||||
{
|
||||
return (deflate(stream, flush));
|
||||
}
|
||||
|
||||
static int
|
||||
zlib_deflateEnd(z_stream *stream)
|
||||
{
|
||||
return (deflateEnd(stream));
|
||||
}
|
||||
|
||||
static int
|
||||
zlib_inflateInit(z_stream *stream)
|
||||
{
|
||||
stream->zalloc = zcalloc;
|
||||
stream->opaque = NULL;
|
||||
stream->zfree = zcfree;
|
||||
|
||||
return (inflateInit(stream));
|
||||
}
|
||||
|
||||
static int
|
||||
zlib_inflate(z_stream *stream, int finish)
|
||||
{
|
||||
#if __FreeBSD_version >= 1300024
|
||||
return (inflate(stream, finish));
|
||||
#else
|
||||
return (_zlib104_inflate(stream, finish));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
zlib_inflateEnd(z_stream *stream)
|
||||
{
|
||||
return (inflateInit(stream));
|
||||
}
|
||||
|
||||
/*
|
||||
* A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
|
||||
* and vfree for every call. Using a kmem_cache also has the advantage
|
||||
* that improves the odds that the memory used will be local to this cpu.
|
||||
* To further improve things it might be wise to create a dedicated per-cpu
|
||||
* workspace for use. This would take some additional care because we then
|
||||
* must disable preemption around the critical section, and verify that
|
||||
* zlib_deflate* and zlib_inflate* never internally call schedule().
|
||||
*/
|
||||
static void *
|
||||
zlib_workspace_alloc(int flags)
|
||||
{
|
||||
// return (kmem_cache_alloc(zlib_workspace_cache, flags));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
zlib_workspace_free(void *workspace)
|
||||
{
|
||||
// kmem_cache_free(zlib_workspace_cache, workspace);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compresses the source buffer into the destination buffer. The level
|
||||
* parameter has the same meaning as in deflateInit. sourceLen is the byte
|
||||
* length of the source buffer. Upon entry, destLen is the total size of the
|
||||
* destination buffer, which must be at least 0.1% larger than sourceLen plus
|
||||
* 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
|
||||
*
|
||||
* compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
|
||||
* memory, Z_BUF_ERROR if there was not enough room in the output buffer,
|
||||
* Z_STREAM_ERROR if the level parameter is invalid.
|
||||
*/
|
||||
int
|
||||
z_compress_level(void *dest, size_t *destLen, const void *source,
|
||||
size_t sourceLen, int level)
|
||||
{
|
||||
z_stream stream;
|
||||
int err;
|
||||
|
||||
bzero(&stream, sizeof (stream));
|
||||
stream.next_in = (Byte *)source;
|
||||
stream.avail_in = (uInt)sourceLen;
|
||||
stream.next_out = dest;
|
||||
stream.avail_out = (uInt)*destLen;
|
||||
stream.opaque = NULL;
|
||||
|
||||
if ((size_t)stream.avail_out != *destLen)
|
||||
return (Z_BUF_ERROR);
|
||||
|
||||
stream.opaque = zlib_workspace_alloc(KM_SLEEP);
|
||||
#if 0
|
||||
if (!stream.opaque)
|
||||
return (Z_MEM_ERROR);
|
||||
#endif
|
||||
err = zlib_deflateInit(&stream, level);
|
||||
if (err != Z_OK) {
|
||||
zlib_workspace_free(stream.opaque);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = zlib_deflate(&stream, Z_FINISH);
|
||||
if (err != Z_STREAM_END) {
|
||||
zlib_deflateEnd(&stream);
|
||||
zlib_workspace_free(stream.opaque);
|
||||
return (err == Z_OK ? Z_BUF_ERROR : err);
|
||||
}
|
||||
*destLen = stream.total_out;
|
||||
|
||||
err = zlib_deflateEnd(&stream);
|
||||
zlib_workspace_free(stream.opaque);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decompresses the source buffer into the destination buffer. sourceLen is
|
||||
* the byte length of the source buffer. Upon entry, destLen is the total
|
||||
* size of the destination buffer, which must be large enough to hold the
|
||||
* entire uncompressed data. (The size of the uncompressed data must have
|
||||
* been saved previously by the compressor and transmitted to the decompressor
|
||||
* by some mechanism outside the scope of this compression library.)
|
||||
* Upon exit, destLen is the actual size of the compressed buffer.
|
||||
* This function can be used to decompress a whole file at once if the
|
||||
* input file is mmap'ed.
|
||||
*
|
||||
* uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
|
||||
* enough memory, Z_BUF_ERROR if there was not enough room in the output
|
||||
* buffer, or Z_DATA_ERROR if the input data was corrupted.
|
||||
*/
|
||||
int
|
||||
z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
|
||||
{
|
||||
z_stream stream;
|
||||
int err;
|
||||
|
||||
bzero(&stream, sizeof (stream));
|
||||
|
||||
stream.next_in = (Byte *)source;
|
||||
stream.avail_in = (uInt)sourceLen;
|
||||
stream.next_out = dest;
|
||||
stream.avail_out = (uInt)*destLen;
|
||||
|
||||
if ((size_t)stream.avail_out != *destLen)
|
||||
return (Z_BUF_ERROR);
|
||||
|
||||
stream.opaque = zlib_workspace_alloc(KM_SLEEP);
|
||||
#if 0
|
||||
if (!stream.opaque)
|
||||
return (Z_MEM_ERROR);
|
||||
#endif
|
||||
err = zlib_inflateInit(&stream);
|
||||
if (err != Z_OK) {
|
||||
zlib_workspace_free(stream.opaque);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = zlib_inflate(&stream, Z_FINISH);
|
||||
if (err != Z_STREAM_END) {
|
||||
zlib_inflateEnd(&stream);
|
||||
zlib_workspace_free(stream.opaque);
|
||||
|
||||
if (err == Z_NEED_DICT ||
|
||||
(err == Z_BUF_ERROR && stream.avail_in == 0))
|
||||
return (Z_DATA_ERROR);
|
||||
|
||||
return (err);
|
||||
}
|
||||
*destLen = stream.total_out;
|
||||
|
||||
err = zlib_inflateEnd(&stream);
|
||||
zlib_workspace_free(stream.opaque);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
#if 0
|
||||
int
|
||||
spl_zlib_init(void)
|
||||
{
|
||||
int size;
|
||||
|
||||
size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize());
|
||||
|
||||
zlib_workspace_cache = kmem_cache_create(
|
||||
"spl_zlib_workspace_cache",
|
||||
size, 0, NULL, NULL, NULL, NULL, NULL,
|
||||
KMC_VMEM | KMC_NOEMERGENCY);
|
||||
if (!zlib_workspace_cache)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_zlib_fini(void)
|
||||
{
|
||||
kmem_cache_destroy(zlib_workspace_cache);
|
||||
zlib_workspace_cache = NULL;
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/osd.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/zone.h>
|
||||
|
||||
#include <sys/policy.h>
|
||||
|
||||
static MALLOC_DEFINE(M_ZONES, "zones_data", "Zones data");
|
||||
|
||||
/*
|
||||
* Structure to record list of ZFS datasets exported to a zone.
|
||||
*/
|
||||
typedef struct zone_dataset {
|
||||
LIST_ENTRY(zone_dataset) zd_next;
|
||||
char zd_dataset[0];
|
||||
} zone_dataset_t;
|
||||
|
||||
LIST_HEAD(zone_dataset_head, zone_dataset);
|
||||
|
||||
static int zone_slot;
|
||||
|
||||
int
|
||||
zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
|
||||
{
|
||||
struct zone_dataset_head *head;
|
||||
zone_dataset_t *zd, *zd2;
|
||||
struct prison *pr;
|
||||
int dofree, error;
|
||||
|
||||
if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
|
||||
return (error);
|
||||
|
||||
/* Allocate memory before we grab prison's mutex. */
|
||||
zd = malloc(sizeof (*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK);
|
||||
|
||||
sx_slock(&allprison_lock);
|
||||
pr = prison_find(jailid); /* Locks &pr->pr_mtx. */
|
||||
sx_sunlock(&allprison_lock);
|
||||
if (pr == NULL) {
|
||||
free(zd, M_ZONES);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
head = osd_jail_get(pr, zone_slot);
|
||||
if (head != NULL) {
|
||||
dofree = 0;
|
||||
LIST_FOREACH(zd2, head, zd_next) {
|
||||
if (strcmp(dataset, zd2->zd_dataset) == 0) {
|
||||
free(zd, M_ZONES);
|
||||
error = EEXIST;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dofree = 1;
|
||||
prison_hold_locked(pr);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
head = malloc(sizeof (*head), M_ZONES, M_WAITOK);
|
||||
LIST_INIT(head);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
error = osd_jail_set(pr, zone_slot, head);
|
||||
KASSERT(error == 0, ("osd_jail_set() failed (error=%d)",
|
||||
error));
|
||||
}
|
||||
strcpy(zd->zd_dataset, dataset);
|
||||
LIST_INSERT_HEAD(head, zd, zd_next);
|
||||
end:
|
||||
if (dofree)
|
||||
prison_free_locked(pr);
|
||||
else
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid)
|
||||
{
|
||||
struct zone_dataset_head *head;
|
||||
zone_dataset_t *zd;
|
||||
struct prison *pr;
|
||||
int error;
|
||||
|
||||
if ((error = spl_priv_check_cred(cred, PRIV_ZFS_JAIL)) != 0)
|
||||
return (error);
|
||||
|
||||
sx_slock(&allprison_lock);
|
||||
pr = prison_find(jailid);
|
||||
sx_sunlock(&allprison_lock);
|
||||
if (pr == NULL)
|
||||
return (ENOENT);
|
||||
head = osd_jail_get(pr, zone_slot);
|
||||
if (head == NULL) {
|
||||
error = ENOENT;
|
||||
goto end;
|
||||
}
|
||||
LIST_FOREACH(zd, head, zd_next) {
|
||||
if (strcmp(dataset, zd->zd_dataset) == 0)
|
||||
break;
|
||||
}
|
||||
if (zd == NULL)
|
||||
error = ENOENT;
|
||||
else {
|
||||
LIST_REMOVE(zd, zd_next);
|
||||
free(zd, M_ZONES);
|
||||
if (LIST_EMPTY(head))
|
||||
osd_jail_del(pr, zone_slot);
|
||||
error = 0;
|
||||
}
|
||||
end:
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the named dataset is visible in the current zone.
|
||||
* The 'write' parameter is set to 1 if the dataset is also writable.
|
||||
*/
|
||||
int
|
||||
zone_dataset_visible(const char *dataset, int *write)
|
||||
{
|
||||
struct zone_dataset_head *head;
|
||||
zone_dataset_t *zd;
|
||||
struct prison *pr;
|
||||
size_t len;
|
||||
int ret = 0;
|
||||
|
||||
if (dataset[0] == '\0')
|
||||
return (0);
|
||||
if (INGLOBALZONE(curproc)) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
return (1);
|
||||
}
|
||||
pr = curthread->td_ucred->cr_prison;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
head = osd_jail_get(pr, zone_slot);
|
||||
if (head == NULL)
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* Walk the list once, looking for datasets which match exactly, or
|
||||
* specify a dataset underneath an exported dataset. If found, return
|
||||
* true and note that it is writable.
|
||||
*/
|
||||
LIST_FOREACH(zd, head, zd_next) {
|
||||
len = strlen(zd->zd_dataset);
|
||||
if (strlen(dataset) >= len &&
|
||||
bcmp(dataset, zd->zd_dataset, len) == 0 &&
|
||||
(dataset[len] == '\0' || dataset[len] == '/' ||
|
||||
dataset[len] == '@')) {
|
||||
if (write)
|
||||
*write = 1;
|
||||
ret = 1;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk the list a second time, searching for datasets which are parents
|
||||
* of exported datasets. These should be visible, but read-only.
|
||||
*
|
||||
* Note that we also have to support forms such as 'pool/dataset/', with
|
||||
* a trailing slash.
|
||||
*/
|
||||
LIST_FOREACH(zd, head, zd_next) {
|
||||
len = strlen(dataset);
|
||||
if (dataset[len - 1] == '/')
|
||||
len--; /* Ignore trailing slash */
|
||||
if (len < strlen(zd->zd_dataset) &&
|
||||
bcmp(dataset, zd->zd_dataset, len) == 0 &&
|
||||
zd->zd_dataset[len] == '/') {
|
||||
if (write)
|
||||
*write = 0;
|
||||
ret = 1;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
end:
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
zone_destroy(void *arg)
|
||||
{
|
||||
struct zone_dataset_head *head;
|
||||
zone_dataset_t *zd;
|
||||
|
||||
head = arg;
|
||||
while ((zd = LIST_FIRST(head)) != NULL) {
|
||||
LIST_REMOVE(zd, zd_next);
|
||||
free(zd, M_ZONES);
|
||||
}
|
||||
free(head, M_ZONES);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
zone_get_hostid(void *ptr)
|
||||
{
|
||||
|
||||
KASSERT(ptr == NULL, ("only NULL pointer supported in %s", __func__));
|
||||
|
||||
return ((uint32_t)curthread->td_ucred->cr_prison->pr_hostid);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
in_globalzone(struct proc *p)
|
||||
{
|
||||
return (!jailed(FIRST_THREAD_IN_PROC((p))->td_ucred));
|
||||
}
|
||||
|
||||
static void
|
||||
zone_sysinit(void *arg __unused)
|
||||
{
|
||||
|
||||
zone_slot = osd_jail_register(zone_destroy, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
zone_sysuninit(void *arg __unused)
|
||||
{
|
||||
|
||||
osd_jail_deregister(zone_slot);
|
||||
}
|
||||
|
||||
SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL);
|
||||
SYSUNINIT(zone_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysuninit, NULL);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio_compress.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/multilist.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
#include <sys/eventhandler.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/zthr.h>
|
||||
#include <zfs_fletcher.h>
|
||||
#include <sys/arc_impl.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/aggsum.h>
|
||||
#include <cityhash.h>
|
||||
|
||||
extern struct vfsops zfs_vfsops;
|
||||
|
||||
/* vmem_size typemask */
|
||||
#define VMEM_ALLOC 0x01
|
||||
#define VMEM_FREE 0x02
|
||||
#define VMEM_MAXFREE 0x10
|
||||
typedef size_t vmem_size_t;
|
||||
extern vmem_size_t vmem_size(vmem_t *vm, int typemask);
|
||||
|
||||
uint_t zfs_arc_free_target = 0;
|
||||
|
||||
int64_t last_free_memory;
|
||||
free_memory_reason_t last_free_reason;
|
||||
|
||||
int64_t
|
||||
arc_available_memory(void)
|
||||
{
|
||||
int64_t lowest = INT64_MAX;
|
||||
int64_t n __unused;
|
||||
free_memory_reason_t r = FMR_UNKNOWN;
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Cooperate with pagedaemon when it's time for it to scan
|
||||
* and reclaim some pages.
|
||||
*/
|
||||
n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
|
||||
if (n < lowest) {
|
||||
lowest = n;
|
||||
r = FMR_LOTSFREE;
|
||||
}
|
||||
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
|
||||
/*
|
||||
* If we're on an i386 platform, it's possible that we'll exhaust the
|
||||
* kernel heap space before we ever run out of available physical
|
||||
* memory. Most checks of the size of the heap_area compare against
|
||||
* tune.t_minarmem, which is the minimum available real memory that we
|
||||
* can have in the system. However, this is generally fixed at 25 pages
|
||||
* which is so low that it's useless. In this comparison, we seek to
|
||||
* calculate the total heap-size, and reclaim if more than 3/4ths of the
|
||||
* heap is allocated. (Or, in the calculation, if less than 1/4th is
|
||||
* free)
|
||||
*/
|
||||
n = uma_avail() - (long)(uma_limit() / 4);
|
||||
if (n < lowest) {
|
||||
lowest = n;
|
||||
r = FMR_HEAP_ARENA;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If zio data pages are being allocated out of a separate heap segment,
|
||||
* then enforce that the size of available vmem for this arena remains
|
||||
* above about 1/4th (1/(2^arc_zio_arena_free_shift)) free.
|
||||
*
|
||||
* Note that reducing the arc_zio_arena_free_shift keeps more virtual
|
||||
* memory (in the zio_arena) free, which can avoid memory
|
||||
* fragmentation issues.
|
||||
*/
|
||||
if (zio_arena != NULL) {
|
||||
n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
|
||||
(vmem_size(zio_arena, VMEM_ALLOC) >>
|
||||
arc_zio_arena_free_shift);
|
||||
if (n < lowest) {
|
||||
lowest = n;
|
||||
r = FMR_ZIO_ARENA;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* _KERNEL */
|
||||
/* Every 100 calls, free a small amount */
|
||||
if (spa_get_random(100) == 0)
|
||||
lowest = -1024;
|
||||
#endif /* _KERNEL */
|
||||
|
||||
last_free_memory = lowest;
|
||||
last_free_reason = r;
|
||||
DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r);
|
||||
return (lowest);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a default max arc size based on the amount of physical memory.
|
||||
*/
|
||||
uint64_t
|
||||
arc_default_max(uint64_t min, uint64_t allmem)
|
||||
{
|
||||
uint64_t size;
|
||||
|
||||
if (allmem >= 1 << 30)
|
||||
size = allmem - (1 << 30);
|
||||
else
|
||||
size = min;
|
||||
return (MAX(allmem * 5 / 8, size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for arc_prune_async() it is responsible for safely
|
||||
* handling the execution of a registered arc_prune_func_t.
|
||||
*/
|
||||
static void
|
||||
arc_prune_task(void *arg)
|
||||
{
|
||||
int64_t nr_scan = *(int64_t *)arg;
|
||||
|
||||
arc_reduce_target_size(ptob(nr_scan));
|
||||
free(arg, M_TEMP);
|
||||
vnlru_free(nr_scan, &zfs_vfsops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
|
||||
* is analogous to dnlc_reduce_cache() but more generic.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
||||
* for releasing it once the registered arc_prune_func_t has completed.
|
||||
*/
|
||||
void
|
||||
arc_prune_async(int64_t adjust)
|
||||
{
|
||||
|
||||
int64_t *adjustptr;
|
||||
|
||||
if ((adjustptr = malloc(sizeof (int64_t), M_TEMP, M_NOWAIT)) == NULL)
|
||||
return;
|
||||
|
||||
*adjustptr = adjust;
|
||||
taskq_dispatch(arc_prune_taskq, arc_prune_task, adjustptr, TQ_SLEEP);
|
||||
ARCSTAT_BUMP(arcstat_prune);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
arc_all_memory(void)
|
||||
{
|
||||
return ((uint64_t)ptob(physmem));
|
||||
}
|
||||
|
||||
int
|
||||
arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
arc_free_memory(void)
|
||||
{
|
||||
/* XXX */
|
||||
return (0);
|
||||
}
|
||||
|
||||
static eventhandler_tag arc_event_lowmem = NULL;
|
||||
|
||||
static void
|
||||
arc_lowmem(void *arg __unused, int howto __unused)
|
||||
{
|
||||
int64_t free_memory, to_free;
|
||||
|
||||
arc_no_grow = B_TRUE;
|
||||
arc_warm = B_TRUE;
|
||||
arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
|
||||
free_memory = arc_available_memory();
|
||||
to_free = (arc_c >> arc_shrink_shift) - MIN(free_memory, 0);
|
||||
DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
|
||||
arc_reduce_target_size(to_free);
|
||||
|
||||
mutex_enter(&arc_adjust_lock);
|
||||
arc_adjust_needed = B_TRUE;
|
||||
zthr_wakeup(arc_adjust_zthr);
|
||||
|
||||
/*
|
||||
* It is unsafe to block here in arbitrary threads, because we can come
|
||||
* here from ARC itself and may hold ARC locks and thus risk a deadlock
|
||||
* with ARC reclaim thread.
|
||||
*/
|
||||
if (curproc == pageproc)
|
||||
(void) cv_wait(&arc_adjust_waiters_cv, &arc_adjust_lock);
|
||||
mutex_exit(&arc_adjust_lock);
|
||||
}
|
||||
|
||||
void
|
||||
arc_lowmem_init(void)
|
||||
{
|
||||
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
|
||||
EVENTHANDLER_PRI_FIRST);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
arc_lowmem_fini(void)
|
||||
{
|
||||
if (arc_event_lowmem != NULL)
|
||||
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
|
||||
}
|
||||
@@ -0,0 +1,613 @@
|
||||
/*
|
||||
* Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
* Copyright (c) 2018 Sean Eric Fagan <sef@ixsystems.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* Portions of this file are derived from sys/geom/eli/g_eli_hmac.c
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <opencrypto/cryptodev.h>
|
||||
#include <opencrypto/xform.h>
|
||||
#else
|
||||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
#include <sys/zio_crypt.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#include <sys/freebsd_crypto.h>
|
||||
|
||||
#define SHA512_HMAC_BLOCK_SIZE 128
|
||||
|
||||
static int crypt_sessions = 0;
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, crypt_sessions, CTLFLAG_RD,
|
||||
&crypt_sessions, 0, "Number of cryptographic sessions created");
|
||||
|
||||
void
|
||||
crypto_mac_init(struct hmac_ctx *ctx, const crypto_key_t *c_key)
|
||||
{
|
||||
uint8_t k_ipad[SHA512_HMAC_BLOCK_SIZE],
|
||||
k_opad[SHA512_HMAC_BLOCK_SIZE],
|
||||
key[SHA512_HMAC_BLOCK_SIZE];
|
||||
SHA512_CTX lctx;
|
||||
int i;
|
||||
size_t cl_bytes = CRYPTO_BITS2BYTES(c_key->ck_length);
|
||||
|
||||
/*
|
||||
* This code is based on the similar code in geom/eli/g_eli_hmac.c
|
||||
*/
|
||||
explicit_bzero(key, sizeof (key));
|
||||
if (c_key->ck_length == 0)
|
||||
/* do nothing */;
|
||||
else if (cl_bytes <= SHA512_HMAC_BLOCK_SIZE)
|
||||
bcopy(c_key->ck_data, key, cl_bytes);
|
||||
else {
|
||||
/*
|
||||
* If key is longer than 128 bytes reset it to
|
||||
* key = SHA512(key).
|
||||
*/
|
||||
SHA512_Init(&lctx);
|
||||
SHA512_Update(&lctx, c_key->ck_data, cl_bytes);
|
||||
SHA512_Final(key, &lctx);
|
||||
}
|
||||
|
||||
/* XOR key with ipad and opad values. */
|
||||
for (i = 0; i < sizeof (key); i++) {
|
||||
k_ipad[i] = key[i] ^ 0x36;
|
||||
k_opad[i] = key[i] ^ 0x5c;
|
||||
}
|
||||
explicit_bzero(key, sizeof (key));
|
||||
|
||||
/* Start inner SHA512. */
|
||||
SHA512_Init(&ctx->innerctx);
|
||||
SHA512_Update(&ctx->innerctx, k_ipad, sizeof (k_ipad));
|
||||
explicit_bzero(k_ipad, sizeof (k_ipad));
|
||||
/* Start outer SHA512. */
|
||||
SHA512_Init(&ctx->outerctx);
|
||||
SHA512_Update(&ctx->outerctx, k_opad, sizeof (k_opad));
|
||||
explicit_bzero(k_opad, sizeof (k_opad));
|
||||
}
|
||||
|
||||
void
|
||||
crypto_mac_update(struct hmac_ctx *ctx, const void *data, size_t datasize)
|
||||
{
|
||||
SHA512_Update(&ctx->innerctx, data, datasize);
|
||||
}
|
||||
|
||||
void
|
||||
crypto_mac_final(struct hmac_ctx *ctx, void *md, size_t mdsize)
|
||||
{
|
||||
uint8_t digest[SHA512_DIGEST_LENGTH];
|
||||
|
||||
/* Complete inner hash */
|
||||
SHA512_Final(digest, &ctx->innerctx);
|
||||
|
||||
/* Complete outer hash */
|
||||
SHA512_Update(&ctx->outerctx, digest, sizeof (digest));
|
||||
SHA512_Final(digest, &ctx->outerctx);
|
||||
|
||||
explicit_bzero(ctx, sizeof (*ctx));
|
||||
/* mdsize == 0 means "Give me the whole hash!" */
|
||||
if (mdsize == 0)
|
||||
mdsize = SHA512_DIGEST_LENGTH;
|
||||
bcopy(digest, md, mdsize);
|
||||
explicit_bzero(digest, sizeof (digest));
|
||||
}
|
||||
|
||||
void
|
||||
crypto_mac(const crypto_key_t *key, const void *in_data, size_t in_data_size,
|
||||
void *out_data, size_t out_data_size)
|
||||
{
|
||||
struct hmac_ctx ctx;
|
||||
|
||||
crypto_mac_init(&ctx, key);
|
||||
crypto_mac_update(&ctx, in_data, in_data_size);
|
||||
crypto_mac_final(&ctx, out_data, out_data_size);
|
||||
}
|
||||
|
||||
static int
|
||||
freebsd_zfs_crypt_done(struct cryptop *crp)
|
||||
{
|
||||
freebsd_crypt_session_t *ses;
|
||||
|
||||
ses = crp->crp_opaque;
|
||||
mtx_lock(&ses->fs_lock);
|
||||
ses->fs_done = true;
|
||||
mtx_unlock(&ses->fs_lock);
|
||||
wakeup(crp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
|
||||
{
|
||||
mtx_destroy(&sess->fs_lock);
|
||||
crypto_freesession(sess->fs_sid);
|
||||
explicit_bzero(sess, sizeof (*sess));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
|
||||
{
|
||||
int error;
|
||||
|
||||
crp->crp_opaque = session;
|
||||
crp->crp_callback = freebsd_zfs_crypt_done;
|
||||
for (;;) {
|
||||
error = crypto_dispatch(crp);
|
||||
if (error)
|
||||
break;
|
||||
mtx_lock(&session->fs_lock);
|
||||
while (session->fs_done == false)
|
||||
msleep(crp, &session->fs_lock, PRIBIO,
|
||||
"zfs_crypto", hz/5);
|
||||
mtx_unlock(&session->fs_lock);
|
||||
|
||||
if (crp->crp_etype != EAGAIN) {
|
||||
error = crp->crp_etype;
|
||||
break;
|
||||
}
|
||||
crp->crp_etype = 0;
|
||||
crp->crp_flags &= ~CRYPTO_F_DONE;
|
||||
session->fs_done = false;
|
||||
#if __FreeBSD_version < 1300087
|
||||
/*
|
||||
* Session ID changed, so we should record that,
|
||||
* and try again
|
||||
*/
|
||||
session->fs_sid = crp->crp_session;
|
||||
#endif
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
static void
|
||||
freebsd_crypt_uio_debug_log(boolean_t encrypt,
|
||||
freebsd_crypt_session_t *input_sessionp,
|
||||
struct zio_crypt_info *c_info,
|
||||
uio_t *data_uio,
|
||||
crypto_key_t *key,
|
||||
uint8_t *ivbuf,
|
||||
size_t datalen,
|
||||
size_t auth_len)
|
||||
{
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
struct cryptodesc *crd;
|
||||
uint8_t *p = NULL;
|
||||
size_t total = 0;
|
||||
|
||||
printf("%s(%s, %p, { %s, %d, %d, %s }, %p, { %d, %p, %u }, "
|
||||
"%p, %u, %u)\n",
|
||||
__FUNCTION__, encrypt ? "encrypt" : "decrypt", input_sessionp,
|
||||
c_info->ci_algname, c_info->ci_crypt_type,
|
||||
(unsigned int)c_info->ci_keylen, c_info->ci_name,
|
||||
data_uio, key->ck_format, key->ck_data,
|
||||
(unsigned int)key->ck_length,
|
||||
ivbuf, (unsigned int)datalen, (unsigned int)auth_len);
|
||||
printf("\tkey = { ");
|
||||
for (int i = 0; i < key->ck_length / 8; i++) {
|
||||
uint8_t *b = (uint8_t *)key->ck_data;
|
||||
printf("%02x ", b[i]);
|
||||
}
|
||||
printf("}\n");
|
||||
for (int i = 0; i < data_uio->uio_iovcnt; i++) {
|
||||
printf("\tiovec #%d: <%p, %u>\n", i,
|
||||
data_uio->uio_iov[i].iov_base,
|
||||
(unsigned int)data_uio->uio_iov[i].iov_len);
|
||||
total += data_uio->uio_iov[i].iov_len;
|
||||
}
|
||||
data_uio->uio_resid = total;
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
* Create a new cryptographic session. This should
|
||||
* happen every time the key changes (including when
|
||||
* it's first loaded).
|
||||
*/
|
||||
#if __FreeBSD_version >= 1300087
|
||||
int
|
||||
freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
|
||||
struct zio_crypt_info *c_info, crypto_key_t *key)
|
||||
{
|
||||
struct crypto_session_params csp;
|
||||
int error = 0;
|
||||
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
|
||||
__FUNCTION__, sessp,
|
||||
c_info->ci_algname, c_info->ci_crypt_type,
|
||||
(unsigned int)c_info->ci_keylen, c_info->ci_name,
|
||||
key->ck_format, key->ck_data, (unsigned int)key->ck_length);
|
||||
printf("\tkey = { ");
|
||||
for (int i = 0; i < key->ck_length / 8; i++) {
|
||||
uint8_t *b = (uint8_t *)key->ck_data;
|
||||
printf("%02x ", b[i]);
|
||||
}
|
||||
printf("}\n");
|
||||
#endif
|
||||
bzero(&csp, sizeof (csp));
|
||||
csp.csp_mode = CSP_MODE_AEAD;
|
||||
csp.csp_cipher_key = key->ck_data;
|
||||
csp.csp_cipher_klen = key->ck_length / 8;
|
||||
switch (c_info->ci_crypt_type) {
|
||||
case ZC_TYPE_GCM:
|
||||
csp.csp_cipher_alg = CRYPTO_AES_NIST_GCM_16;
|
||||
csp.csp_ivlen = AES_GCM_IV_LEN;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_GMAC_KEY_LEN:
|
||||
case AES_192_GMAC_KEY_LEN:
|
||||
case AES_256_GMAC_KEY_LEN:
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
break;
|
||||
case ZC_TYPE_CCM:
|
||||
csp.csp_cipher_alg = CRYPTO_AES_CCM_16;
|
||||
csp.csp_ivlen = AES_CCM_IV_LEN;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_CBC_MAC_KEY_LEN:
|
||||
case AES_192_CBC_MAC_KEY_LEN:
|
||||
case AES_256_CBC_MAC_KEY_LEN:
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error = ENOTSUP;
|
||||
goto bad;
|
||||
}
|
||||
error = crypto_newsession(&sessp->fs_sid, &csp,
|
||||
CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
|
||||
mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
|
||||
NULL, MTX_DEF);
|
||||
crypt_sessions++;
|
||||
bad:
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
if (error)
|
||||
printf("%s: returning error %d\n", __FUNCTION__, error);
|
||||
#endif
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
freebsd_crypt_uio(boolean_t encrypt,
|
||||
freebsd_crypt_session_t *input_sessionp,
|
||||
struct zio_crypt_info *c_info,
|
||||
uio_t *data_uio,
|
||||
crypto_key_t *key,
|
||||
uint8_t *ivbuf,
|
||||
size_t datalen,
|
||||
size_t auth_len)
|
||||
{
|
||||
struct cryptop *crp;
|
||||
freebsd_crypt_session_t *session = NULL;
|
||||
int error = 0;
|
||||
size_t total = 0;
|
||||
|
||||
freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
|
||||
key, ivbuf, datalen, auth_len);
|
||||
for (int i = 0; i < data_uio->uio_iovcnt; i++)
|
||||
total += data_uio->uio_iov[i].iov_len;
|
||||
data_uio->uio_resid = total;
|
||||
if (input_sessionp == NULL) {
|
||||
session = kmem_zalloc(sizeof (*session), KM_SLEEP);
|
||||
error = freebsd_crypt_newsession(session, c_info, key);
|
||||
if (error)
|
||||
goto out;
|
||||
} else
|
||||
session = input_sessionp;
|
||||
|
||||
crp = crypto_getreq(session->fs_sid, M_WAITOK);
|
||||
if (encrypt) {
|
||||
crp->crp_op = CRYPTO_OP_ENCRYPT |
|
||||
CRYPTO_OP_COMPUTE_DIGEST;
|
||||
} else {
|
||||
crp->crp_op = CRYPTO_OP_DECRYPT |
|
||||
CRYPTO_OP_VERIFY_DIGEST;
|
||||
}
|
||||
crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_IV_SEPARATE;
|
||||
crp->crp_buf_type = CRYPTO_BUF_UIO;
|
||||
crp->crp_uio = (void*)data_uio;
|
||||
crp->crp_ilen = data_uio->uio_resid;
|
||||
|
||||
crp->crp_aad_start = 0;
|
||||
crp->crp_aad_length = auth_len;
|
||||
crp->crp_payload_start = auth_len;
|
||||
crp->crp_payload_length = datalen;
|
||||
crp->crp_digest_start = auth_len + datalen;
|
||||
|
||||
bcopy(ivbuf, crp->crp_iv, ZIO_DATA_IV_LEN);
|
||||
error = zfs_crypto_dispatch(session, crp);
|
||||
crypto_freereq(crp);
|
||||
out:
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
if (error)
|
||||
printf("%s: returning error %d\n", __FUNCTION__, error);
|
||||
#endif
|
||||
if (input_sessionp == NULL) {
|
||||
freebsd_crypt_freesession(session);
|
||||
kmem_free(session, sizeof (*session));
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
#else
|
||||
int
|
||||
freebsd_crypt_newsession(freebsd_crypt_session_t *sessp,
|
||||
struct zio_crypt_info *c_info, crypto_key_t *key)
|
||||
{
|
||||
struct cryptoini cria, crie, *crip;
|
||||
struct enc_xform *xform;
|
||||
struct auth_hash *xauth;
|
||||
int error = 0;
|
||||
crypto_session_t sid;
|
||||
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s(%p, { %s, %d, %d, %s }, { %d, %p, %u })\n",
|
||||
__FUNCTION__, sessp,
|
||||
c_info->ci_algname, c_info->ci_crypt_type,
|
||||
(unsigned int)c_info->ci_keylen, c_info->ci_name,
|
||||
key->ck_format, key->ck_data, (unsigned int)key->ck_length);
|
||||
printf("\tkey = { ");
|
||||
for (int i = 0; i < key->ck_length / 8; i++) {
|
||||
uint8_t *b = (uint8_t *)key->ck_data;
|
||||
printf("%02x ", b[i]);
|
||||
}
|
||||
printf("}\n");
|
||||
#endif
|
||||
switch (c_info->ci_crypt_type) {
|
||||
case ZC_TYPE_GCM:
|
||||
xform = &enc_xform_aes_nist_gcm;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_128;
|
||||
break;
|
||||
case AES_192_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_192;
|
||||
break;
|
||||
case AES_256_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_256;
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
break;
|
||||
case ZC_TYPE_CCM:
|
||||
xform = &enc_xform_ccm;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_128;
|
||||
break;
|
||||
case AES_192_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_192;
|
||||
break;
|
||||
case AES_256_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_256;
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error = ENOTSUP;
|
||||
goto bad;
|
||||
}
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
|
||||
"auth %s (key length %d)\n",
|
||||
__FUNCTION__, __LINE__,
|
||||
xform->name, (unsigned int)key->ck_length,
|
||||
(unsigned int)key->ck_length/8,
|
||||
xauth->name, xauth->keysize);
|
||||
#endif
|
||||
|
||||
bzero(&crie, sizeof (crie));
|
||||
bzero(&cria, sizeof (cria));
|
||||
|
||||
crie.cri_alg = xform->type;
|
||||
crie.cri_key = key->ck_data;
|
||||
crie.cri_klen = key->ck_length;
|
||||
|
||||
cria.cri_alg = xauth->type;
|
||||
cria.cri_key = key->ck_data;
|
||||
cria.cri_klen = key->ck_length;
|
||||
|
||||
cria.cri_next = &crie;
|
||||
crie.cri_next = NULL;
|
||||
crip = &cria;
|
||||
// Everything else is bzero'd
|
||||
|
||||
error = crypto_newsession(&sid, crip,
|
||||
CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE);
|
||||
if (error != 0) {
|
||||
printf("%s(%d): crypto_newsession failed with %d\n",
|
||||
__FUNCTION__, __LINE__, error);
|
||||
goto bad;
|
||||
}
|
||||
sessp->fs_sid = sid;
|
||||
mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock",
|
||||
NULL, MTX_DEF);
|
||||
crypt_sessions++;
|
||||
bad:
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* The meat of encryption/decryption.
|
||||
* If sessp is NULL, then it will create a
|
||||
* temporary cryptographic session, and release
|
||||
* it when done.
|
||||
*/
|
||||
int
|
||||
freebsd_crypt_uio(boolean_t encrypt,
|
||||
freebsd_crypt_session_t *input_sessionp,
|
||||
struct zio_crypt_info *c_info,
|
||||
uio_t *data_uio,
|
||||
crypto_key_t *key,
|
||||
uint8_t *ivbuf,
|
||||
size_t datalen,
|
||||
size_t auth_len)
|
||||
{
|
||||
struct cryptop *crp;
|
||||
struct cryptodesc *enc_desc, *auth_desc;
|
||||
struct enc_xform *xform;
|
||||
struct auth_hash *xauth;
|
||||
freebsd_crypt_session_t *session = NULL;
|
||||
int error;
|
||||
|
||||
freebsd_crypt_uio_debug_log(encrypt, input_sessionp, c_info, data_uio,
|
||||
key, ivbuf, datalen, auth_len);
|
||||
switch (c_info->ci_crypt_type) {
|
||||
case ZC_TYPE_GCM:
|
||||
xform = &enc_xform_aes_nist_gcm;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_128;
|
||||
break;
|
||||
case AES_192_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_192;
|
||||
break;
|
||||
case AES_256_GMAC_KEY_LEN:
|
||||
xauth = &auth_hash_nist_gmac_aes_256;
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
break;
|
||||
case ZC_TYPE_CCM:
|
||||
xform = &enc_xform_ccm;
|
||||
switch (key->ck_length/8) {
|
||||
case AES_128_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_128;
|
||||
break;
|
||||
case AES_192_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_192;
|
||||
break;
|
||||
case AES_256_CBC_MAC_KEY_LEN:
|
||||
xauth = &auth_hash_ccm_cbc_mac_256;
|
||||
break;
|
||||
default:
|
||||
error = EINVAL;
|
||||
goto bad;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error = ENOTSUP;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s(%d): Using crypt %s (key length %u [%u bytes]), "
|
||||
"auth %s (key length %d)\n",
|
||||
__FUNCTION__, __LINE__,
|
||||
xform->name, (unsigned int)key->ck_length,
|
||||
(unsigned int)key->ck_length/8,
|
||||
xauth->name, xauth->keysize);
|
||||
#endif
|
||||
|
||||
if (input_sessionp == NULL) {
|
||||
session = kmem_zalloc(sizeof (*session), KM_SLEEP);
|
||||
error = freebsd_crypt_newsession(session, c_info, key);
|
||||
if (error)
|
||||
goto out;
|
||||
} else
|
||||
session = input_sessionp;
|
||||
|
||||
crp = crypto_getreq(2);
|
||||
if (crp == NULL) {
|
||||
error = ENOMEM;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
auth_desc = crp->crp_desc;
|
||||
enc_desc = auth_desc->crd_next;
|
||||
|
||||
crp->crp_session = session->fs_sid;
|
||||
crp->crp_ilen = auth_len + datalen;
|
||||
crp->crp_buf = (void*)data_uio;
|
||||
crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC;
|
||||
|
||||
auth_desc->crd_skip = 0;
|
||||
auth_desc->crd_len = auth_len;
|
||||
auth_desc->crd_inject = auth_len + datalen;
|
||||
auth_desc->crd_alg = xauth->type;
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s: auth: skip = %u, len = %u, inject = %u\n",
|
||||
__FUNCTION__, auth_desc->crd_skip, auth_desc->crd_len,
|
||||
auth_desc->crd_inject);
|
||||
#endif
|
||||
|
||||
enc_desc->crd_skip = auth_len;
|
||||
enc_desc->crd_len = datalen;
|
||||
enc_desc->crd_inject = auth_len;
|
||||
enc_desc->crd_alg = xform->type;
|
||||
enc_desc->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
|
||||
bcopy(ivbuf, enc_desc->crd_iv, ZIO_DATA_IV_LEN);
|
||||
enc_desc->crd_next = NULL;
|
||||
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
printf("%s: enc: skip = %u, len = %u, inject = %u\n",
|
||||
__FUNCTION__, enc_desc->crd_skip, enc_desc->crd_len,
|
||||
enc_desc->crd_inject);
|
||||
#endif
|
||||
|
||||
if (encrypt)
|
||||
enc_desc->crd_flags |= CRD_F_ENCRYPT;
|
||||
|
||||
error = zfs_crypto_dispatch(session, crp);
|
||||
crypto_freereq(crp);
|
||||
out:
|
||||
if (input_sessionp == NULL) {
|
||||
freebsd_crypt_freesession(session);
|
||||
kmem_free(session, sizeof (*session));
|
||||
}
|
||||
bad:
|
||||
#ifdef FCRYPTO_DEBUG
|
||||
if (error)
|
||||
printf("%s: returning error %d\n", __FUNCTION__, error);
|
||||
#endif
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,346 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_traverse.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dmu_zfetch.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zio_compress.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
#include <sys/racct.h>
|
||||
#include <sys/vm.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
|
||||
|
||||
#ifndef IDX_TO_OFF
|
||||
#define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
|
||||
#endif
|
||||
|
||||
#if __FreeBSD_version < 1300051
|
||||
#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
|
||||
#else
|
||||
#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
|
||||
#endif
|
||||
|
||||
|
||||
#if __FreeBSD_version < 1300072
|
||||
#define dmu_page_lock(m) vm_page_lock(m)
|
||||
#define dmu_page_unlock(m) vm_page_unlock(m)
|
||||
#else
|
||||
#define dmu_page_lock(m)
|
||||
#define dmu_page_unlock(m)
|
||||
#endif
|
||||
|
||||
static int
|
||||
dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int err;
|
||||
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
|
||||
numbufsp, dbpp, DMU_READ_PREFETCH);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
vm_page_t *ma, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t **dbp;
|
||||
struct sf_buf *sf;
|
||||
int numbufs, i;
|
||||
int err;
|
||||
|
||||
if (size == 0)
|
||||
return (0);
|
||||
|
||||
err = dmu_buf_hold_array(os, object, offset, size,
|
||||
FALSE, FTAG, &numbufs, &dbp);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
for (i = 0; i < numbufs; i++) {
|
||||
int tocpy, copied, thiscpy;
|
||||
int bufoff;
|
||||
dmu_buf_t *db = dbp[i];
|
||||
caddr_t va;
|
||||
|
||||
ASSERT(size > 0);
|
||||
ASSERT3U(db->db_size, >=, PAGESIZE);
|
||||
|
||||
bufoff = offset - db->db_offset;
|
||||
tocpy = (int)MIN(db->db_size - bufoff, size);
|
||||
|
||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_will_fill(db, tx);
|
||||
else
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
for (copied = 0; copied < tocpy; copied += PAGESIZE) {
|
||||
ASSERT3U(ptoa((*ma)->pindex), ==,
|
||||
db->db_offset + bufoff);
|
||||
thiscpy = MIN(PAGESIZE, tocpy - copied);
|
||||
va = zfs_map_page(*ma, &sf);
|
||||
bcopy(va, (char *)db->db_data + bufoff, thiscpy);
|
||||
zfs_unmap_page(sf);
|
||||
ma += 1;
|
||||
bufoff += PAGESIZE;
|
||||
}
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
|
||||
offset += tocpy;
|
||||
size -= tocpy;
|
||||
}
|
||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
|
||||
int *rbehind, int *rahead, int last_size)
|
||||
{
|
||||
struct sf_buf *sf;
|
||||
vm_object_t vmobj;
|
||||
vm_page_t m;
|
||||
dmu_buf_t **dbp;
|
||||
dmu_buf_t *db;
|
||||
caddr_t va;
|
||||
int numbufs, i;
|
||||
int bufoff, pgoff, tocpy;
|
||||
int mi, di;
|
||||
int err;
|
||||
|
||||
ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
|
||||
ASSERT(last_size <= PAGE_SIZE);
|
||||
|
||||
err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
|
||||
IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
#ifdef DEBUG
|
||||
IMPLY(last_size < PAGE_SIZE, *rahead == 0);
|
||||
if (dbp[0]->db_offset != 0 || numbufs > 1) {
|
||||
for (i = 0; i < numbufs; i++) {
|
||||
ASSERT(ISP2(dbp[i]->db_size));
|
||||
ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0);
|
||||
ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
vmobj = ma[0]->object;
|
||||
zfs_vmobject_wlock(vmobj);
|
||||
|
||||
db = dbp[0];
|
||||
for (i = 0; i < *rbehind; i++) {
|
||||
m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i,
|
||||
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
|
||||
if (m == NULL)
|
||||
break;
|
||||
if (!vm_page_none_valid(m)) {
|
||||
ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
|
||||
vm_page_do_sunbusy(m);
|
||||
break;
|
||||
}
|
||||
ASSERT(m->dirty == 0);
|
||||
ASSERT(!pmap_page_is_mapped(m));
|
||||
|
||||
ASSERT(db->db_size > PAGE_SIZE);
|
||||
bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
|
||||
va = zfs_map_page(m, &sf);
|
||||
bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
|
||||
zfs_unmap_page(sf);
|
||||
vm_page_valid(m);
|
||||
dmu_page_lock(m);
|
||||
if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
|
||||
vm_page_activate(m);
|
||||
else
|
||||
vm_page_deactivate(m);
|
||||
dmu_page_unlock(m);
|
||||
vm_page_do_sunbusy(m);
|
||||
}
|
||||
*rbehind = i;
|
||||
|
||||
bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
|
||||
pgoff = 0;
|
||||
for (mi = 0, di = 0; mi < count && di < numbufs; ) {
|
||||
if (pgoff == 0) {
|
||||
m = ma[mi];
|
||||
if (m != bogus_page) {
|
||||
vm_page_assert_xbusied(m);
|
||||
ASSERT(vm_page_none_valid(m));
|
||||
ASSERT(m->dirty == 0);
|
||||
ASSERT(!pmap_page_is_mapped(m));
|
||||
va = zfs_map_page(m, &sf);
|
||||
}
|
||||
}
|
||||
if (bufoff == 0)
|
||||
db = dbp[di];
|
||||
|
||||
if (m != bogus_page) {
|
||||
ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
|
||||
db->db_offset + bufoff);
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not need to clamp the copy size by the file
|
||||
* size as the last block is zero-filled beyond the
|
||||
* end of file anyway.
|
||||
*/
|
||||
tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
|
||||
if (m != bogus_page)
|
||||
bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
|
||||
|
||||
pgoff += tocpy;
|
||||
ASSERT(pgoff <= PAGESIZE);
|
||||
if (pgoff == PAGESIZE) {
|
||||
if (m != bogus_page) {
|
||||
zfs_unmap_page(sf);
|
||||
vm_page_valid(m);
|
||||
}
|
||||
ASSERT(mi < count);
|
||||
mi++;
|
||||
pgoff = 0;
|
||||
}
|
||||
|
||||
bufoff += tocpy;
|
||||
ASSERT(bufoff <= db->db_size);
|
||||
if (bufoff == db->db_size) {
|
||||
ASSERT(di < numbufs);
|
||||
di++;
|
||||
bufoff = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Three possibilities:
|
||||
* - last requested page ends at a buffer boundary and , thus,
|
||||
* all pages and buffers have been iterated;
|
||||
* - all requested pages are filled, but the last buffer
|
||||
* has not been exhausted;
|
||||
* the read-ahead is possible only in this case;
|
||||
* - all buffers have been read, but the last page has not been
|
||||
* fully filled;
|
||||
* this is only possible if the file has only a single buffer
|
||||
* with a size that is not a multiple of the page size.
|
||||
*/
|
||||
if (mi == count) {
|
||||
ASSERT(di >= numbufs - 1);
|
||||
IMPLY(*rahead != 0, di == numbufs - 1);
|
||||
IMPLY(*rahead != 0, bufoff != 0);
|
||||
ASSERT(pgoff == 0);
|
||||
}
|
||||
if (di == numbufs) {
|
||||
ASSERT(mi >= count - 1);
|
||||
ASSERT(*rahead == 0);
|
||||
IMPLY(pgoff == 0, mi == count);
|
||||
if (pgoff != 0) {
|
||||
ASSERT(mi == count - 1);
|
||||
ASSERT((dbp[0]->db_size & PAGE_MASK) != 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (pgoff != 0) {
|
||||
ASSERT(m != bogus_page);
|
||||
bzero(va + pgoff, PAGESIZE - pgoff);
|
||||
zfs_unmap_page(sf);
|
||||
vm_page_valid(m);
|
||||
}
|
||||
|
||||
for (i = 0; i < *rahead; i++) {
|
||||
m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i,
|
||||
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
|
||||
if (m == NULL)
|
||||
break;
|
||||
if (!vm_page_none_valid(m)) {
|
||||
ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
|
||||
vm_page_do_sunbusy(m);
|
||||
break;
|
||||
}
|
||||
ASSERT(m->dirty == 0);
|
||||
ASSERT(!pmap_page_is_mapped(m));
|
||||
|
||||
ASSERT(db->db_size > PAGE_SIZE);
|
||||
bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
|
||||
tocpy = MIN(db->db_size - bufoff, PAGESIZE);
|
||||
va = zfs_map_page(m, &sf);
|
||||
bcopy((char *)db->db_data + bufoff, va, tocpy);
|
||||
if (tocpy < PAGESIZE) {
|
||||
ASSERT(i == *rahead - 1);
|
||||
ASSERT((db->db_size & PAGE_MASK) != 0);
|
||||
bzero(va + tocpy, PAGESIZE - tocpy);
|
||||
}
|
||||
zfs_unmap_page(sf);
|
||||
vm_page_valid(m);
|
||||
dmu_page_lock(m);
|
||||
if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
|
||||
vm_page_activate(m);
|
||||
else
|
||||
vm_page_deactivate(m);
|
||||
dmu_page_unlock(m);
|
||||
vm_page_do_sunbusy(m);
|
||||
}
|
||||
*rahead = i;
|
||||
zfs_vmobject_wunlock(vmobj);
|
||||
|
||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017, Datto, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/hkdf.h>
|
||||
#include <sys/freebsd_crypto.h>
|
||||
#include <sys/hkdf.h>
|
||||
|
||||
static int
|
||||
hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material,
|
||||
uint_t km_len, uint8_t *out_buf)
|
||||
{
|
||||
crypto_key_t key;
|
||||
|
||||
/* initialize the salt as a crypto key */
|
||||
key.ck_format = CRYPTO_KEY_RAW;
|
||||
key.ck_length = CRYPTO_BYTES2BITS(salt_len);
|
||||
key.ck_data = salt;
|
||||
|
||||
crypto_mac(&key, key_material, km_len, out_buf, SHA512_DIGEST_LENGTH);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len,
|
||||
uint8_t *out_buf, uint_t out_len)
|
||||
{
|
||||
struct hmac_ctx ctx;
|
||||
crypto_key_t key;
|
||||
uint_t i, T_len = 0, pos = 0;
|
||||
uint8_t c;
|
||||
uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH;
|
||||
uint8_t T[SHA512_DIGEST_LENGTH];
|
||||
|
||||
if (N > 255)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* initialize the salt as a crypto key */
|
||||
key.ck_format = CRYPTO_KEY_RAW;
|
||||
key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH);
|
||||
key.ck_data = extract_key;
|
||||
|
||||
for (i = 1; i <= N; i++) {
|
||||
c = i;
|
||||
|
||||
crypto_mac_init(&ctx, &key);
|
||||
crypto_mac_update(&ctx, T, T_len);
|
||||
crypto_mac_update(&ctx, info, info_len);
|
||||
crypto_mac_update(&ctx, &c, 1);
|
||||
crypto_mac_final(&ctx, T, SHA512_DIGEST_LENGTH);
|
||||
bcopy(T, out_buf + pos,
|
||||
(i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos));
|
||||
pos += SHA512_DIGEST_LENGTH;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* HKDF is designed to be a relatively fast function for deriving keys from a
|
||||
* master key + a salt. We use this function to generate new encryption keys
|
||||
* so as to avoid hitting the cryptographic limits of the underlying
|
||||
* encryption modes. Note that, for the sake of deriving encryption keys, the
|
||||
* info parameter is called the "salt" everywhere else in the code.
|
||||
*/
|
||||
int
|
||||
hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
|
||||
uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
|
||||
uint_t out_len)
|
||||
{
|
||||
int ret;
|
||||
uint8_t extract_key[SHA512_DIGEST_LENGTH];
|
||||
|
||||
ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len,
|
||||
extract_key);
|
||||
if (ret != 0)
|
||||
return (ret);
|
||||
|
||||
ret = hkdf_sha512_expand(extract_key, info, info_len, output_key,
|
||||
out_len);
|
||||
if (ret != 0)
|
||||
return (ret);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,404 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/eventhandler.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/eventhandler.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/fm/util.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zfs_dir.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zcp.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
|
||||
#include <sys/zfs_ioctl_compat.h>
|
||||
#include <sys/zfs_ioctl_impl.h>
|
||||
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
#include "zfs_deleg.h"
|
||||
#include "zfs_comutil.h"
|
||||
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_DECL(_vfs_zfs_vdev);
|
||||
|
||||
|
||||
static int zfs_version_ioctl = ZFS_IOCVER_ZOF;
|
||||
SYSCTL_DECL(_vfs_zfs_version);
|
||||
SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl,
|
||||
0, "ZFS_IOCTL_VERSION");
|
||||
|
||||
static struct cdev *zfsdev;
|
||||
|
||||
extern void zfs_init(void);
|
||||
extern void zfs_fini(void);
|
||||
extern void zfs_ioctl_init(void);
|
||||
|
||||
|
||||
static struct root_hold_token *zfs_root_token;
|
||||
|
||||
extern uint_t rrw_tsd_key;
|
||||
extern uint_t zfs_allow_log_key;
|
||||
extern uint_t zfs_geom_probe_vdev_key;
|
||||
|
||||
static int zfs__init(void);
|
||||
static int zfs__fini(void);
|
||||
static void zfs_shutdown(void *, int);
|
||||
|
||||
static eventhandler_tag zfs_shutdown_event_tag;
|
||||
extern zfsdev_state_t *zfsdev_state_list;
|
||||
|
||||
#define ZFS_MIN_KSTACK_PAGES 4
|
||||
|
||||
static void
|
||||
zfs_cmd_bsd12_to_zof(zfs_cmd_legacy_t *src, zfs_cmd_t *dst)
|
||||
{
|
||||
memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
|
||||
*&dst->zc_objset_stats = *&src->zc_objset_stats;
|
||||
memcpy(&dst->zc_begin_record, &src->zc_begin_record,
|
||||
offsetof(zfs_cmd_t, zc_sendobj) -
|
||||
offsetof(zfs_cmd_t, zc_begin_record));
|
||||
memcpy(&dst->zc_sendobj, &src->zc_sendobj,
|
||||
sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
|
||||
dst->zc_zoneid = src->zc_jailid;
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_cmd_zof_to_bsd12(zfs_cmd_t *src, zfs_cmd_legacy_t *dst)
|
||||
{
|
||||
memcpy(dst, src, offsetof(zfs_cmd_t, zc_objset_stats));
|
||||
*&dst->zc_objset_stats = *&src->zc_objset_stats;
|
||||
memcpy(&dst->zc_begin_record, &src->zc_begin_record,
|
||||
offsetof(zfs_cmd_t, zc_sendobj) -
|
||||
offsetof(zfs_cmd_t, zc_begin_record));
|
||||
memcpy(&dst->zc_sendobj, &src->zc_sendobj,
|
||||
sizeof (zfs_cmd_t) - 8 - offsetof(zfs_cmd_t, zc_sendobj));
|
||||
dst->zc_jailid = src->zc_zoneid;
|
||||
}
|
||||
|
||||
static int
|
||||
zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
|
||||
struct thread *td)
|
||||
{
|
||||
uint_t len, vecnum;
|
||||
zfs_iocparm_t *zp;
|
||||
zfs_cmd_t *zc;
|
||||
zfs_cmd_legacy_t *zcl;
|
||||
int rc, error;
|
||||
void *uaddr;
|
||||
|
||||
len = IOCPARM_LEN(zcmd);
|
||||
vecnum = zcmd & 0xff;
|
||||
zp = (void *)arg;
|
||||
uaddr = (void *)zp->zfs_cmd;
|
||||
error = 0;
|
||||
zcl = NULL;
|
||||
|
||||
if (len != sizeof (zfs_iocparm_t)) {
|
||||
printf("len %d vecnum: %d sizeof (zfs_cmd_t) %lu\n",
|
||||
len, vecnum, sizeof (zfs_cmd_t));
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
|
||||
/*
|
||||
* Remap ioctl code for legacy user binaries
|
||||
*/
|
||||
if (zp->zfs_ioctl_version == ZFS_IOCVER_FREEBSD) {
|
||||
if (vecnum >= sizeof (zfs_ioctl_bsd12_to_zof)/sizeof (long)) {
|
||||
kmem_free(zc, sizeof (zfs_cmd_t));
|
||||
return (ENOTSUP);
|
||||
}
|
||||
zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
|
||||
vecnum = zfs_ioctl_bsd12_to_zof[vecnum];
|
||||
if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
|
||||
error = SET_ERROR(EFAULT);
|
||||
goto out;
|
||||
}
|
||||
zfs_cmd_bsd12_to_zof(zcl, zc);
|
||||
} else if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
|
||||
error = SET_ERROR(EFAULT);
|
||||
goto out;
|
||||
}
|
||||
error = zfsdev_ioctl_common(vecnum, zc);
|
||||
if (zcl) {
|
||||
zfs_cmd_zof_to_bsd12(zc, zcl);
|
||||
rc = copyout(zcl, uaddr, sizeof (*zcl));
|
||||
} else {
|
||||
rc = copyout(zc, uaddr, sizeof (*zc));
|
||||
}
|
||||
if (error == 0 && rc != 0)
|
||||
error = SET_ERROR(EFAULT);
|
||||
out:
|
||||
if (zcl)
|
||||
kmem_free(zcl, sizeof (zfs_cmd_legacy_t));
|
||||
kmem_free(zc, sizeof (zfs_cmd_t));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
zfsdev_close(void *data)
|
||||
{
|
||||
zfsdev_state_t *zs, *zsp = data;
|
||||
|
||||
mutex_enter(&zfsdev_state_lock);
|
||||
for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
|
||||
if (zs == zsp)
|
||||
break;
|
||||
}
|
||||
if (zs == NULL || zs->zs_minor <= 0) {
|
||||
mutex_exit(&zfsdev_state_lock);
|
||||
return;
|
||||
}
|
||||
zs->zs_minor = -1;
|
||||
zfs_onexit_destroy(zs->zs_onexit);
|
||||
zfs_zevent_destroy(zs->zs_zevent);
|
||||
mutex_exit(&zfsdev_state_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ctldev_init(struct cdev *devp)
|
||||
{
|
||||
boolean_t newzs = B_FALSE;
|
||||
minor_t minor;
|
||||
zfsdev_state_t *zs, *zsprev = NULL;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zfsdev_state_lock));
|
||||
|
||||
minor = zfsdev_minor_alloc();
|
||||
if (minor == 0)
|
||||
return (SET_ERROR(ENXIO));
|
||||
|
||||
for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
|
||||
if (zs->zs_minor == -1)
|
||||
break;
|
||||
zsprev = zs;
|
||||
}
|
||||
|
||||
if (!zs) {
|
||||
zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
|
||||
newzs = B_TRUE;
|
||||
}
|
||||
|
||||
devfs_set_cdevpriv(zs, zfsdev_close);
|
||||
|
||||
zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
|
||||
zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
|
||||
|
||||
if (newzs) {
|
||||
zs->zs_minor = minor;
|
||||
wmb();
|
||||
zsprev->zs_next = zs;
|
||||
} else {
|
||||
wmb();
|
||||
zs->zs_minor = minor;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
|
||||
{
|
||||
int error;
|
||||
|
||||
mutex_enter(&zfsdev_state_lock);
|
||||
error = zfs_ctldev_init(devp);
|
||||
mutex_exit(&zfsdev_state_lock);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static struct cdevsw zfs_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_open = zfsdev_open,
|
||||
.d_ioctl = zfsdev_ioctl,
|
||||
.d_name = ZFS_DRIVER
|
||||
};
|
||||
|
||||
int
|
||||
zfsdev_attach(void)
|
||||
{
|
||||
zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
|
||||
ZFS_DRIVER);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zfsdev_detach(void)
|
||||
{
|
||||
if (zfsdev != NULL)
|
||||
destroy_dev(zfsdev);
|
||||
}
|
||||
|
||||
int
|
||||
zfs__init(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
#if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES
|
||||
printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack "
|
||||
"overflow panic!\nPlease consider adding "
|
||||
"'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES,
|
||||
ZFS_MIN_KSTACK_PAGES);
|
||||
#endif
|
||||
zfs_root_token = root_mount_hold("ZFS");
|
||||
if ((error = zfs_kmod_init()) != 0) {
|
||||
printf("ZFS: Failed to Load ZFS Filesystem"
|
||||
", rc = %d\n", error);
|
||||
root_mount_rel(zfs_root_token);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
tsd_create(&zfs_geom_probe_vdev_key, NULL);
|
||||
|
||||
printf("ZFS storage pool version: features support ("
|
||||
SPA_VERSION_STRING ")\n");
|
||||
root_mount_rel(zfs_root_token);
|
||||
ddi_sysevent_init();
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs__fini(void)
|
||||
{
|
||||
if (zfs_busy() || zvol_busy() ||
|
||||
zio_injection_enabled) {
|
||||
return (EBUSY);
|
||||
}
|
||||
zfs_kmod_fini();
|
||||
tsd_destroy(&zfs_geom_probe_vdev_key);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_shutdown(void *arg __unused, int howto __unused)
|
||||
{
|
||||
|
||||
/*
|
||||
* ZFS fini routines can not properly work in a panic-ed system.
|
||||
*/
|
||||
if (panicstr == NULL)
|
||||
zfs__fini();
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
zfs_modevent(module_t mod, int type, void *unused __unused)
|
||||
{
|
||||
int err;
|
||||
|
||||
switch (type) {
|
||||
case MOD_LOAD:
|
||||
err = zfs__init();
|
||||
if (err == 0)
|
||||
zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
|
||||
shutdown_post_sync, zfs_shutdown, NULL,
|
||||
SHUTDOWN_PRI_FIRST);
|
||||
return (err);
|
||||
case MOD_UNLOAD:
|
||||
err = zfs__fini();
|
||||
if (err == 0 && zfs_shutdown_event_tag != NULL)
|
||||
EVENTHANDLER_DEREGISTER(shutdown_post_sync,
|
||||
zfs_shutdown_event_tag);
|
||||
return (err);
|
||||
case MOD_SHUTDOWN:
|
||||
return (0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static moduledata_t zfs_mod = {
|
||||
"zfsctrl",
|
||||
zfs_modevent,
|
||||
0
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
|
||||
#endif
|
||||
|
||||
DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
|
||||
MODULE_VERSION(zfsctrl, 1);
|
||||
MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
|
||||
MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
|
||||
MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1);
|
||||
MODULE_DEPEND(zfsctrl, cryptodev, 1, 1, 1);
|
||||
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/vdev_indirect_mapping.h>
|
||||
#include <sys/vdev_indirect_births.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/metaslab_impl.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/dmu_traverse.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/unique.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/spa_boot.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/zone.h>
|
||||
|
||||
#include "zfs_prop.h"
|
||||
#include "zfs_comutil.h"
|
||||
|
||||
extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs,
|
||||
uint64_t *count);
|
||||
|
||||
static nvlist_t *
|
||||
spa_generate_rootconf(const char *name)
|
||||
{
|
||||
nvlist_t **configs, **tops;
|
||||
nvlist_t *config;
|
||||
nvlist_t *best_cfg, *nvtop, *nvroot;
|
||||
uint64_t *holes;
|
||||
uint64_t best_txg;
|
||||
uint64_t nchildren;
|
||||
uint64_t pgid;
|
||||
uint64_t count;
|
||||
uint64_t i;
|
||||
uint_t nholes;
|
||||
|
||||
if (vdev_geom_read_pool_label(name, &configs, &count) != 0)
|
||||
return (NULL);
|
||||
|
||||
ASSERT3U(count, !=, 0);
|
||||
best_txg = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
uint64_t txg;
|
||||
|
||||
VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG,
|
||||
&txg) == 0);
|
||||
if (txg > best_txg) {
|
||||
best_txg = txg;
|
||||
best_cfg = configs[i];
|
||||
}
|
||||
}
|
||||
|
||||
nchildren = 1;
|
||||
nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren);
|
||||
holes = NULL;
|
||||
nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY,
|
||||
&holes, &nholes);
|
||||
|
||||
tops = kmem_zalloc(nchildren * sizeof (void *), KM_SLEEP);
|
||||
for (i = 0; i < nchildren; i++) {
|
||||
if (i >= count)
|
||||
break;
|
||||
if (configs[i] == NULL)
|
||||
continue;
|
||||
VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvtop) == 0);
|
||||
nvlist_dup(nvtop, &tops[i], KM_SLEEP);
|
||||
}
|
||||
for (i = 0; holes != NULL && i < nholes; i++) {
|
||||
if (i >= nchildren)
|
||||
continue;
|
||||
if (tops[holes[i]] != NULL)
|
||||
continue;
|
||||
nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP);
|
||||
VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_HOLE) == 0);
|
||||
VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID,
|
||||
holes[i]) == 0);
|
||||
VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID,
|
||||
0) == 0);
|
||||
}
|
||||
for (i = 0; i < nchildren; i++) {
|
||||
if (tops[i] != NULL)
|
||||
continue;
|
||||
nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP);
|
||||
VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_MISSING) == 0);
|
||||
VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID,
|
||||
i) == 0);
|
||||
VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID,
|
||||
0) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create pool config based on the best vdev config.
|
||||
*/
|
||||
nvlist_dup(best_cfg, &config, KM_SLEEP);
|
||||
|
||||
/*
|
||||
* Put this pool's top-level vdevs into a root vdev.
|
||||
*/
|
||||
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
||||
&pgid) == 0);
|
||||
VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_ROOT) == 0);
|
||||
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
|
||||
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
|
||||
VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
tops, nchildren) == 0);
|
||||
|
||||
/*
|
||||
* Replace the existing vdev_tree with the new root vdev in
|
||||
* this pool's configuration (remove the old, add the new).
|
||||
*/
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
|
||||
|
||||
/*
|
||||
* Drop vdev config elements that should not be present at pool level.
|
||||
*/
|
||||
nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64);
|
||||
nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64);
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
nvlist_free(configs[i]);
|
||||
kmem_free(configs, count * sizeof (void *));
|
||||
for (i = 0; i < nchildren; i++)
|
||||
nvlist_free(tops[i]);
|
||||
kmem_free(tops, nchildren * sizeof (void *));
|
||||
nvlist_free(nvroot);
|
||||
return (config);
|
||||
}
|
||||
|
||||
int
|
||||
spa_import_rootpool(const char *name)
|
||||
{
|
||||
spa_t *spa;
|
||||
vdev_t *rvd;
|
||||
nvlist_t *config, *nvtop;
|
||||
uint64_t txg;
|
||||
char *pname;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Read the label from the boot device and generate a configuration.
|
||||
*/
|
||||
config = spa_generate_rootconf(name);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if (config != NULL) {
|
||||
VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
|
||||
&pname) == 0 && strcmp(name, pname) == 0);
|
||||
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg)
|
||||
== 0);
|
||||
|
||||
if ((spa = spa_lookup(pname)) != NULL) {
|
||||
/*
|
||||
* The pool could already be imported,
|
||||
* e.g., after reboot -r.
|
||||
*/
|
||||
if (spa->spa_state == POOL_STATE_ACTIVE) {
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
nvlist_free(config);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the existing root pool from the namespace so
|
||||
* that we can replace it with the correct config
|
||||
* we just read in.
|
||||
*/
|
||||
spa_remove(spa);
|
||||
}
|
||||
spa = spa_add(pname, config, NULL);
|
||||
|
||||
/*
|
||||
* Set spa_ubsync.ub_version as it can be used in vdev_alloc()
|
||||
* via spa_version().
|
||||
*/
|
||||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
|
||||
&spa->spa_ubsync.ub_version) != 0)
|
||||
spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
|
||||
} else if ((spa = spa_lookup(name)) == NULL) {
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
nvlist_free(config);
|
||||
cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
|
||||
name);
|
||||
return (EIO);
|
||||
} else {
|
||||
VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0);
|
||||
}
|
||||
spa->spa_is_root = B_TRUE;
|
||||
spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
|
||||
|
||||
/*
|
||||
* Build up a vdev tree based on the boot device's label config.
|
||||
*/
|
||||
VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvtop) == 0);
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
|
||||
VDEV_ALLOC_ROOTPOOL);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
if (error) {
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
nvlist_free(config);
|
||||
cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
|
||||
pname);
|
||||
return (error);
|
||||
}
|
||||
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
vdev_free(rvd);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
nvlist_free(config);
|
||||
return (0);
|
||||
}
|
||||
|
||||
const char *
|
||||
spa_history_zone(void)
|
||||
{
|
||||
return ("freebsd");
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/spa.h>
|
||||
#include <zfs_comutil.h>
|
||||
|
||||
void
|
||||
spa_stats_init(spa_t *spa)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
spa_stats_destroy(spa_t *spa)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
spa_iostats_trim_add(spa_t *spa, trim_type_t type,
|
||||
uint64_t extents_written, uint64_t bytes_written,
|
||||
uint64_t extents_skipped, uint64_t bytes_skipped,
|
||||
uint64_t extents_failed, uint64_t bytes_failed)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
|
||||
{
|
||||
|
||||
}
|
||||
/*
|
||||
* Set txg state completion time and increment current state.
|
||||
*/
|
||||
int
|
||||
spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
|
||||
hrtime_t completed_time)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
txg_stat_t *
|
||||
spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
|
||||
{
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void
|
||||
spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
|
||||
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
|
||||
int error)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
|
||||
hrtime_t duration)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,699 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zfs_dir.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zcp.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
|
||||
#include <sys/zfs_ioctl_compat.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#include <sys/arc_impl.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
|
||||
#include <../zfs_config.h>
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS events");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "space allocation");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, reconstruct, CTLFLAG_RW, 0, "reconstruct");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, prefetch, CTLFLAG_RW, 0, "ZFS ZFETCH");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, multihost, CTLFLAG_RW, 0, "multihost protection");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, mg, CTLFLAG_RW, 0, "metaslab group");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, lua, CTLFLAG_RW, 0, "lua");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, l2arc, CTLFLAG_RW, 0, "l2arc");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0, "ZFS disk buf cache");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, deadman, CTLFLAG_RW, 0, "ZFS deadman");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS Adaptive Replacement Cache");
|
||||
|
||||
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
|
||||
"ZFS VDEV Mirror");
|
||||
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, livelist, CTLFLAG_RW, 0, "livelist state");
|
||||
SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0, "condense knobs");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, recv, CTLFLAG_RW, 0, "receive knobs");
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, send, CTLFLAG_RW, 0, "send knobs");
|
||||
|
||||
SYSCTL_DECL(_vfs_zfs_version);
|
||||
SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
|
||||
(ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
|
||||
|
||||
extern arc_state_t ARC_anon;
|
||||
extern arc_state_t ARC_mru;
|
||||
extern arc_state_t ARC_mru_ghost;
|
||||
extern arc_state_t ARC_mfu;
|
||||
extern arc_state_t ARC_mfu_ghost;
|
||||
extern arc_state_t ARC_l2c_only;
|
||||
|
||||
/*
|
||||
* minimum lifespan of a prefetch block in clock ticks
|
||||
* (initialized in arc_init())
|
||||
*/
|
||||
|
||||
/* arc.c */
|
||||
|
||||
/* legacy compat */
|
||||
extern unsigned long l2arc_write_max; /* def max write size */
|
||||
extern unsigned long l2arc_write_boost; /* extra warmup write */
|
||||
extern unsigned long l2arc_headroom; /* # of dev writes */
|
||||
extern unsigned long l2arc_headroom_boost;
|
||||
extern unsigned long l2arc_feed_secs; /* interval seconds */
|
||||
extern unsigned long l2arc_feed_min_ms; /* min interval msecs */
|
||||
extern int l2arc_noprefetch; /* don't cache prefetch bufs */
|
||||
extern int l2arc_feed_again; /* turbo warmup */
|
||||
extern int l2arc_norw; /* no reads during writes */
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
|
||||
&l2arc_write_max, 0, "max write size (LEGACY)");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
|
||||
&l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
|
||||
&l2arc_headroom, 0, "number of dev writes (LEGACY)");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
|
||||
&l2arc_feed_secs, 0, "interval seconds (LEGACY)");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
|
||||
&l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
|
||||
&l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
|
||||
&l2arc_feed_again, 0, "turbo warmup (LEGACY)");
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
|
||||
&l2arc_norw, 0, "no reads during writes (LEGACY)");
|
||||
#if 0
|
||||
extern int zfs_compressed_arc_enabled;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RW,
|
||||
&zfs_compressed_arc_enabled, 1, "compressed arc buffers (LEGACY)");
|
||||
#endif
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of anonymous state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of anonymous state");
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_size.rc_count, 0, "size of mru state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mru state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mru state");
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mru ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mru ghost state");
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mfu state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mfu state");
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mfu ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mfu ghost state");
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
|
||||
&ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
|
||||
|
||||
extern int arc_no_grow_shift;
|
||||
extern int arc_shrink_shift;
|
||||
|
||||
extern arc_stats_t arc_stats;
|
||||
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
|
||||
#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
|
||||
#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
|
||||
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
|
||||
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
|
||||
#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
|
||||
#define arc_tempreserve ARCSTAT(arcstat_tempreserve)
|
||||
#define arc_loaned_bytes ARCSTAT(arcstat_loaned_bytes)
|
||||
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
|
||||
#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
|
||||
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
|
||||
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
|
||||
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
|
||||
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
|
||||
|
||||
static int
|
||||
sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
uint32_t val;
|
||||
int err;
|
||||
|
||||
val = arc_no_grow_shift;
|
||||
err = sysctl_handle_32(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
if (val >= arc_shrink_shift)
|
||||
return (EINVAL);
|
||||
|
||||
arc_no_grow_shift = val;
|
||||
return (0);
|
||||
}
|
||||
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, CTLTYPE_U32 | CTLFLAG_RWTUN,
|
||||
0, sizeof (uint32_t), sysctl_vfs_zfs_arc_no_grow_shift, "U",
|
||||
"log2(fraction of ARC which must be free to allow growing)");
|
||||
/* dbuf.c */
|
||||
|
||||
|
||||
/* dmu.c */
|
||||
|
||||
/* dmu_zfetch.c */
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH");
|
||||
|
||||
/* max bytes to prefetch per stream (default 8MB) */
|
||||
extern uint32_t zfetch_max_distance;
|
||||
SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
|
||||
&zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
|
||||
|
||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||
extern uint32_t zfetch_max_idistance;
|
||||
SYSCTL_UINT(_vfs_zfs_prefetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
|
||||
&zfetch_max_idistance, 0, "Max bytes to prefetch indirects for per stream");
|
||||
|
||||
/* dsl_pool.c */
|
||||
|
||||
/* dnode.c */
|
||||
extern int zfs_default_bs;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
|
||||
&zfs_default_bs, 0, "Default dnode block shift");
|
||||
|
||||
extern int zfs_default_ibs;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
|
||||
&zfs_default_ibs, 0, "Default dnode indirect block shift");
|
||||
|
||||
|
||||
/* dsl_scan.c */
|
||||
|
||||
/* metaslab.c */
|
||||
|
||||
/*
|
||||
* Enable/disable lba weighting (i.e. outer tracks are given preference).
|
||||
*/
|
||||
extern boolean_t metaslab_lba_weighting_enabled;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, lba_weighting, CTLFLAG_RWTUN,
|
||||
&metaslab_lba_weighting_enabled, 0,
|
||||
"Enable LBA weighting (i.e. outer tracks are given preference)");
|
||||
|
||||
|
||||
/*
|
||||
* In pools where the log space map feature is not enabled we touch
|
||||
* multiple metaslabs (and their respective space maps) with each
|
||||
* transaction group. Thus, we benefit from having a small space map
|
||||
* block size since it allows us to issue more I/O operations scattered
|
||||
* around the disk. So a sane default for the space map block size
|
||||
* is 8~16K.
|
||||
*/
|
||||
extern int zfs_metaslab_sm_blksz_no_log;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
|
||||
&zfs_metaslab_sm_blksz_no_log, 0,
|
||||
"Block size for space map in pools with log space map disabled. "
|
||||
"Power of 2 and greater than 4096.");
|
||||
|
||||
/*
|
||||
* When the log space map feature is enabled, we accumulate a lot of
|
||||
* changes per metaslab that are flushed once in a while so we benefit
|
||||
* from a bigger block size like 128K for the metaslab space maps.
|
||||
*/
|
||||
extern int zfs_metaslab_sm_blksz_with_log;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
|
||||
&zfs_metaslab_sm_blksz_with_log, 0,
|
||||
"Block size for space map in pools with log space map enabled. "
|
||||
"Power of 2 and greater than 4096.");
|
||||
|
||||
/*
|
||||
* The in-core space map representation is more compact than its on-disk form.
|
||||
* The zfs_condense_pct determines how much more compact the in-core
|
||||
* space map representation must be before we compact it on-disk.
|
||||
* Values should be greater than or equal to 100.
|
||||
*/
|
||||
extern int zfs_condense_pct;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
|
||||
&zfs_condense_pct, 0,
|
||||
"Condense on-disk spacemap when it is more than this many percents"
|
||||
" of in-memory counterpart");
|
||||
|
||||
extern int zfs_remove_max_segment;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
|
||||
&zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to"
|
||||
" allocate when removing a device");
|
||||
|
||||
extern int zfs_removal_suspend_progress;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
|
||||
&zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while"
|
||||
" in the middle of a removal");
|
||||
|
||||
|
||||
/*
|
||||
* Minimum size which forces the dynamic allocator to change
|
||||
* it's allocation strategy. Once the space map cannot satisfy
|
||||
* an allocation of this size then it switches to using more
|
||||
* aggressive strategy (i.e search by size rather than offset).
|
||||
*/
|
||||
extern uint64_t metaslab_df_alloc_threshold;
|
||||
SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
|
||||
&metaslab_df_alloc_threshold, 0,
|
||||
"Minimum size which forces the dynamic allocator to change it's allocation strategy");
|
||||
|
||||
/*
|
||||
* The minimum free space, in percent, which must be available
|
||||
* in a space map to continue allocations in a first-fit fashion.
|
||||
* Once the space map's free space drops below this level we dynamically
|
||||
* switch to using best-fit allocations.
|
||||
*/
|
||||
extern int metaslab_df_free_pct;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
|
||||
&metaslab_df_free_pct, 0,
|
||||
"The minimum free space, in percent, which must be available in a "
|
||||
"space map to continue allocations in a first-fit fashion");
|
||||
|
||||
/*
|
||||
* Percentage of all cpus that can be used by the metaslab taskq.
|
||||
*/
|
||||
extern int metaslab_load_pct;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
|
||||
&metaslab_load_pct, 0,
|
||||
"Percentage of cpus that can be used by the metaslab taskq");
|
||||
|
||||
/*
|
||||
* Max number of metaslabs per group to preload.
|
||||
*/
|
||||
extern int metaslab_preload_limit;
|
||||
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
|
||||
&metaslab_preload_limit, 0,
|
||||
"Max number of metaslabs per group to preload");
|
||||
|
||||
/* refcount.c */
|
||||
extern int reference_tracking_enable;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, reference_tracking_enable, CTLFLAG_RDTUN,
|
||||
&reference_tracking_enable, 0,
|
||||
"Track reference holders to refcount_t objects, used mostly by ZFS");
|
||||
|
||||
/* spa.c */
|
||||
extern int zfs_ccw_retry_interval;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
|
||||
&zfs_ccw_retry_interval, 0,
|
||||
"Configuration cache file write, retry after failure, interval (seconds)");
|
||||
|
||||
extern uint64_t zfs_max_missing_tvds_cachefile;
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
|
||||
&zfs_max_missing_tvds_cachefile, 0,
|
||||
"allow importing pools with missing top-level vdevs in cache file");
|
||||
|
||||
extern uint64_t zfs_max_missing_tvds_scan;
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
|
||||
&zfs_max_missing_tvds_scan, 0,
|
||||
"allow importing pools with missing top-level vdevs during scan");
|
||||
|
||||
/* spa_misc.c */
|
||||
extern int zfs_flags;
|
||||
static int
|
||||
sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int err, val;
|
||||
|
||||
val = zfs_flags;
|
||||
err = sysctl_handle_int(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
/*
|
||||
* ZFS_DEBUG_MODIFY must be enabled prior to boot so all
|
||||
* arc buffers in the system have the necessary additional
|
||||
* checksum data. However, it is safe to disable at any
|
||||
* time.
|
||||
*/
|
||||
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
|
||||
val &= ~ZFS_DEBUG_MODIFY;
|
||||
zfs_flags = val;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags,
|
||||
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0,
|
||||
sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing.");
|
||||
|
||||
int
|
||||
param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
val = zfs_deadman_synctime_ms;
|
||||
err = sysctl_handle_long(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
zfs_deadman_synctime_ms = val;
|
||||
|
||||
spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
val = zfs_deadman_ziotime_ms;
|
||||
err = sysctl_handle_long(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
zfs_deadman_ziotime_ms = val;
|
||||
|
||||
spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_synctime_ms));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
char buf[16];
|
||||
int rc;
|
||||
|
||||
if (req->newptr == NULL)
|
||||
strlcpy(buf, zfs_deadman_failmode, sizeof (buf));
|
||||
|
||||
rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
|
||||
if (rc || req->newptr == NULL)
|
||||
return (rc);
|
||||
if (strcmp(buf, zfs_deadman_failmode) == 0)
|
||||
return (0);
|
||||
if (!strcmp(buf, "wait"))
|
||||
zfs_deadman_failmode = "wait";
|
||||
if (!strcmp(buf, "continue"))
|
||||
zfs_deadman_failmode = "continue";
|
||||
if (!strcmp(buf, "panic"))
|
||||
zfs_deadman_failmode = "panic";
|
||||
|
||||
return (-param_set_deadman_failmode_common(buf));
|
||||
}
|
||||
|
||||
|
||||
/* spacemap.c */
|
||||
extern int space_map_ibs;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
|
||||
&space_map_ibs, 0, "Space map indirect block shift");
|
||||
|
||||
|
||||
/* vdev.c */
|
||||
#ifdef notyet
|
||||
extern uint64_t zfs_max_auto_ashift;
|
||||
extern uint64_t zfs_min_auto_ashift;
|
||||
|
||||
static int
|
||||
sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
uint64_t val;
|
||||
int err;
|
||||
|
||||
val = zfs_max_auto_ashift;
|
||||
err = sysctl_handle_64(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
if (val > ASHIFT_MAX || val < zfs_min_auto_ashift)
|
||||
return (EINVAL);
|
||||
|
||||
zfs_max_auto_ashift = val;
|
||||
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
|
||||
CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint64_t),
|
||||
sysctl_vfs_zfs_max_auto_ashift, "QU",
|
||||
"Max ashift used when optimising for logical -> physical sectors size on "
|
||||
"new top-level vdevs.");
|
||||
static int
|
||||
sysctl_vfs_zfs_min_auto_ashift(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
uint64_t val;
|
||||
int err;
|
||||
|
||||
val = zfs_min_auto_ashift;
|
||||
err = sysctl_handle_64(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
if (val < ASHIFT_MIN || val > zfs_max_auto_ashift)
|
||||
return (EINVAL);
|
||||
|
||||
zfs_min_auto_ashift = val;
|
||||
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
|
||||
CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint64_t),
|
||||
sysctl_vfs_zfs_min_auto_ashift, "QU",
|
||||
"Min ashift used when creating new top-level vdevs.");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Since the DTL space map of a vdev is not expected to have a lot of
|
||||
* entries, we default its block size to 4K.
|
||||
*/
|
||||
extern int zfs_vdev_dtl_sm_blksz;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
|
||||
&zfs_vdev_dtl_sm_blksz, 0,
|
||||
"Block size for DTL space map. Power of 2 and greater than 4096.");
|
||||
|
||||
/*
|
||||
* vdev-wide space maps that have lots of entries written to them at
|
||||
* the end of each transaction can benefit from a higher I/O bandwidth
|
||||
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
|
||||
*/
|
||||
extern int zfs_vdev_standard_sm_blksz;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
|
||||
&zfs_vdev_standard_sm_blksz, 0,
|
||||
"Block size for standard space map. Power of 2 and greater than 4096.");
|
||||
|
||||
extern int vdev_validate_skip;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
|
||||
&vdev_validate_skip, 0,
|
||||
"Enable to bypass vdev_validate().");
|
||||
|
||||
|
||||
/* vdev_cache.c */
|
||||
|
||||
/* vdev_mirror.c */
|
||||
/*
|
||||
* The load configuration settings below are tuned by default for
|
||||
* the case where all devices are of the same rotational type.
|
||||
*
|
||||
* If there is a mixture of rotating and non-rotating media, setting
|
||||
* non_rotating_seek_inc to 0 may well provide better results as it
|
||||
* will direct more reads to the non-rotating vdevs which are more
|
||||
* likely to have a higher performance.
|
||||
*/
|
||||
|
||||
|
||||
/* vdev_queue.c */
|
||||
#define ZFS_VDEV_QUEUE_KNOB_MIN(name) \
|
||||
extern uint32_t zfs_vdev_ ## name ## _min_active; \
|
||||
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
|
||||
&zfs_vdev_ ## name ## _min_active, 0, \
|
||||
"Initial number of I/O requests of type " #name \
|
||||
" active for each device");
|
||||
|
||||
#define ZFS_VDEV_QUEUE_KNOB_MAX(name) \
|
||||
extern uint32_t zfs_vdev_ ## name ## _max_active; \
|
||||
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN, \
|
||||
&zfs_vdev_ ## name ## _max_active, 0, \
|
||||
"Maximum number of I/O requests of type " #name \
|
||||
" active for each device");
|
||||
|
||||
|
||||
#undef ZFS_VDEV_QUEUE_KNOB
|
||||
|
||||
extern uint32_t zfs_vdev_max_active;
|
||||
SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
|
||||
&zfs_vdev_max_active, 0,
|
||||
"The maximum number of I/Os of all types active for each device. (LEGACY)");
|
||||
|
||||
extern int zfs_vdev_def_queue_depth;
|
||||
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
|
||||
&zfs_vdev_def_queue_depth, 0,
|
||||
"Default queue depth for each allocator");
|
||||
|
||||
/*extern uint64_t zfs_multihost_history;
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, multihost_history, CTLFLAG_RWTUN,
|
||||
&zfs_multihost_history, 0,
|
||||
"Historical staticists for the last N multihost updates");*/
|
||||
|
||||
#ifdef notyet
|
||||
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
|
||||
&vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
|
||||
#endif
|
||||
|
||||
|
||||
/* zio.c */
|
||||
#if defined(__LP64__)
|
||||
int zio_use_uma = 1;
|
||||
#else
|
||||
int zio_use_uma = 0;
|
||||
#endif
|
||||
|
||||
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
|
||||
"Use uma(9) for ZIO allocations");
|
||||
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
|
||||
"Exclude metadata buffers from dumps as well");
|
||||
|
||||
|
||||
int
|
||||
param_set_arc_long(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = sysctl_handle_long(oidp, arg1, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
arc_tuning_update(B_TRUE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_arc_int(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = sysctl_handle_int(oidp, arg1, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
arc_tuning_update(B_TRUE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_slop_shift(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int val;
|
||||
int err;
|
||||
|
||||
val = *(int *)arg1;
|
||||
|
||||
err = sysctl_handle_int(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
|
||||
if (val < 1 || val > 31)
|
||||
return (EINVAL);
|
||||
|
||||
*(int *)arg1 = val;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev_file.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/*
|
||||
* Virtual device vector for files.
|
||||
*/
|
||||
|
||||
static taskq_t *vdev_file_taskq;
|
||||
|
||||
void
|
||||
vdev_file_init(void)
|
||||
{
|
||||
vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
|
||||
minclsyspri, max_ncpus, INT_MAX, 0);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_file_fini(void)
|
||||
{
|
||||
taskq_destroy(vdev_file_taskq);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_hold(vdev_t *vd)
|
||||
{
|
||||
ASSERT(vd->vdev_path != NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_rele(vdev_t *vd)
|
||||
{
|
||||
ASSERT(vd->vdev_path != NULL);
|
||||
}
|
||||
|
||||
static mode_t
|
||||
vdev_file_open_mode(spa_mode_t spa_mode)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
|
||||
if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
|
||||
mode = O_RDWR;
|
||||
} else if (spa_mode & SPA_MODE_READ) {
|
||||
mode = O_RDONLY;
|
||||
} else if (spa_mode & SPA_MODE_WRITE) {
|
||||
mode = O_WRONLY;
|
||||
}
|
||||
|
||||
return (mode | O_LARGEFILE);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
vdev_file_t *vf;
|
||||
zfs_file_t *fp;
|
||||
zfs_file_attr_t zfa;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Rotational optimizations only make sense on block devices.
|
||||
*/
|
||||
vd->vdev_nonrot = B_TRUE;
|
||||
|
||||
/*
|
||||
* Allow TRIM on file based vdevs. This may not always be supported,
|
||||
* since it depends on your kernel version and underlying filesystem
|
||||
* type but it is always safe to attempt.
|
||||
*/
|
||||
vd->vdev_has_trim = B_TRUE;
|
||||
|
||||
/*
|
||||
* Disable secure TRIM on file based vdevs. There is no way to
|
||||
* request this behavior from the underlying filesystem.
|
||||
*/
|
||||
vd->vdev_has_securetrim = B_FALSE;
|
||||
|
||||
/*
|
||||
* We must have a pathname, and it must be absolute.
|
||||
*/
|
||||
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reopen the device if it's not currently open. Otherwise,
|
||||
* just update the physical size of the device.
|
||||
*/
|
||||
if (vd->vdev_tsd != NULL) {
|
||||
ASSERT(vd->vdev_reopening);
|
||||
vf = vd->vdev_tsd;
|
||||
goto skip_open;
|
||||
}
|
||||
|
||||
vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
|
||||
|
||||
/*
|
||||
* We always open the files from the root of the global zone, even if
|
||||
* we're in a local zone. If the user has gotten to this point, the
|
||||
* administrator has already decided that the pool should be available
|
||||
* to local zone users, so the underlying devices should be as well.
|
||||
*/
|
||||
ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
|
||||
|
||||
error = zfs_file_open(vd->vdev_path,
|
||||
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
|
||||
if (error) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (error);
|
||||
}
|
||||
|
||||
vf->vf_file = fp;
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Make sure it's a regular file.
|
||||
*/
|
||||
if (zfs_file_getattr(fp, &zfa)) {
|
||||
return (SET_ERROR(ENODEV));
|
||||
}
|
||||
if (!S_ISREG(zfa.zfa_mode)) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (SET_ERROR(ENODEV));
|
||||
}
|
||||
#endif
|
||||
|
||||
skip_open:
|
||||
|
||||
error = zfs_file_getattr(vf->vf_file, &zfa);
|
||||
if (error) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
|
||||
return (error);
|
||||
}
|
||||
|
||||
*max_psize = *psize = zfa.zfa_size;
|
||||
*ashift = SPA_MINBLOCKSHIFT;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_close(vdev_t *vd)
|
||||
{
|
||||
vdev_file_t *vf = vd->vdev_tsd;
|
||||
|
||||
if (vd->vdev_reopening || vf == NULL)
|
||||
return;
|
||||
|
||||
if (vf->vf_file != NULL) {
|
||||
zfs_file_close(vf->vf_file);
|
||||
}
|
||||
|
||||
vd->vdev_delayed_close = B_FALSE;
|
||||
kmem_free(vf, sizeof (vdev_file_t));
|
||||
vd->vdev_tsd = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements the interrupt side for file vdev types. This routine will be
|
||||
* called when the I/O completes allowing us to transfer the I/O to the
|
||||
* interrupt taskqs. For consistency, the code structure mimics disk vdev
|
||||
* types.
|
||||
*/
|
||||
static void
|
||||
vdev_file_io_intr(zio_t *zio)
|
||||
{
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_io_strategy(void *arg)
|
||||
{
|
||||
zio_t *zio = arg;
|
||||
vdev_t *vd = zio->io_vd;
|
||||
vdev_file_t *vf;
|
||||
void *buf;
|
||||
ssize_t resid;
|
||||
loff_t off;
|
||||
ssize_t size;
|
||||
int err;
|
||||
|
||||
off = zio->io_offset;
|
||||
size = zio->io_size;
|
||||
resid = 0;
|
||||
|
||||
vf = vd->vdev_tsd;
|
||||
|
||||
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
|
||||
if (zio->io_type == ZIO_TYPE_READ) {
|
||||
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
||||
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
|
||||
abd_return_buf_copy(zio->io_abd, buf, size);
|
||||
} else {
|
||||
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
||||
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
|
||||
abd_return_buf(zio->io_abd, buf, size);
|
||||
}
|
||||
if (resid != 0 && zio->io_error == 0)
|
||||
zio->io_error = ENOSPC;
|
||||
|
||||
vdev_file_io_intr(zio);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_file_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *vd = zio->io_vd;
|
||||
vdev_file_t *vf = vd->vdev_tsd;
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_IOCTL) {
|
||||
/* XXPOLICY */
|
||||
if (!vdev_readable(vd)) {
|
||||
zio->io_error = SET_ERROR(ENXIO);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (zio->io_cmd) {
|
||||
case DKIOCFLUSHWRITECACHE:
|
||||
zio->io_error = zfs_file_fsync(vf->vf_file,
|
||||
O_SYNC|O_DSYNC);
|
||||
break;
|
||||
default:
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
}
|
||||
|
||||
zio_execute(zio);
|
||||
return;
|
||||
} else if (zio->io_type == ZIO_TYPE_TRIM) {
|
||||
#ifdef notyet
|
||||
int mode = 0;
|
||||
|
||||
ASSERT3U(zio->io_size, !=, 0);
|
||||
|
||||
/* XXX FreeBSD has no fallocate routine in file ops */
|
||||
zio->io_error = zfs_file_fallocate(vf->vf_file,
|
||||
mode, zio->io_offset, zio->io_size);
|
||||
#endif
|
||||
zio->io_error = SET_ERROR(ENOTSUP);
|
||||
zio_execute(zio);
|
||||
return;
|
||||
}
|
||||
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
|
||||
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
|
||||
TQ_SLEEP), !=, 0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
vdev_file_io_done(zio_t *zio)
|
||||
{
|
||||
}
|
||||
|
||||
vdev_ops_t vdev_file_ops = {
|
||||
vdev_file_open,
|
||||
vdev_file_close,
|
||||
vdev_default_asize,
|
||||
vdev_file_io_start,
|
||||
vdev_file_io_done,
|
||||
NULL,
|
||||
NULL,
|
||||
vdev_file_hold,
|
||||
vdev_file_rele,
|
||||
NULL,
|
||||
vdev_default_xlate,
|
||||
VDEV_TYPE_FILE, /* name of this vdev type */
|
||||
B_TRUE /* leaf vdev */
|
||||
};
|
||||
|
||||
/*
|
||||
* From userland we access disks just like files.
|
||||
*/
|
||||
#ifndef _KERNEL
|
||||
|
||||
vdev_ops_t vdev_disk_ops = {
|
||||
vdev_file_open,
|
||||
vdev_file_close,
|
||||
vdev_default_asize,
|
||||
vdev_file_io_start,
|
||||
vdev_file_io_done,
|
||||
NULL,
|
||||
NULL,
|
||||
vdev_file_hold,
|
||||
vdev_file_rele,
|
||||
NULL,
|
||||
vdev_default_xlate,
|
||||
VDEV_TYPE_DISK, /* name of this vdev type */
|
||||
B_TRUE /* leaf vdev */
|
||||
};
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_os.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/metaslab_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
int
|
||||
vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
zio_t *zio;
|
||||
abd_t *pad2;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
|
||||
int error;
|
||||
|
||||
if (size > VDEV_PAD_SIZE)
|
||||
return (EINVAL);
|
||||
|
||||
if (!vd->vdev_ops->vdev_op_leaf)
|
||||
return (ENODEV);
|
||||
if (vdev_is_dead(vd))
|
||||
return (ENXIO);
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
|
||||
|
||||
pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
|
||||
abd_zero(pad2, VDEV_PAD_SIZE);
|
||||
abd_copy_from_buf(pad2, buf, size);
|
||||
|
||||
retry:
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
vdev_label_write(zio, vd, 0, pad2,
|
||||
offsetof(vdev_label_t, vl_pad2),
|
||||
VDEV_PAD_SIZE, NULL, NULL, flags);
|
||||
error = zio_wait(zio);
|
||||
if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
abd_free(pad2);
|
||||
return (error);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,254 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/kstat.h>
|
||||
|
||||
typedef struct zfs_dbgmsg {
|
||||
list_node_t zdm_node;
|
||||
time_t zdm_timestamp;
|
||||
int zdm_size;
|
||||
char zdm_msg[1]; /* variable length allocation */
|
||||
} zfs_dbgmsg_t;
|
||||
|
||||
list_t zfs_dbgmsgs;
|
||||
int zfs_dbgmsg_size = 0;
|
||||
kmutex_t zfs_dbgmsgs_lock;
|
||||
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
|
||||
kstat_t *zfs_dbgmsg_kstat;
|
||||
|
||||
/*
|
||||
* Internal ZFS debug messages are enabled by default.
|
||||
*
|
||||
* # Print debug messages
|
||||
* cat /proc/spl/kstat/zfs/dbgmsg
|
||||
*
|
||||
* # Disable the kernel debug message log.
|
||||
* echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
|
||||
*
|
||||
* # Clear the kernel debug message log.
|
||||
* echo 0 >/proc/spl/kstat/zfs/dbgmsg
|
||||
*/
|
||||
int zfs_dbgmsg_enable = 1;
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_headers(char *buf, size_t size)
|
||||
{
|
||||
(void) snprintf(buf, size, "%-12s %-8s\n", "timestamp", "message");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_data(char *buf, size_t size, void *data)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
|
||||
|
||||
(void) snprintf(buf, size, "%-12llu %-s\n",
|
||||
(u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void *
|
||||
zfs_dbgmsg_addr(kstat_t *ksp, loff_t n)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)ksp->ks_private;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
|
||||
|
||||
if (n == 0)
|
||||
ksp->ks_private = list_head(&zfs_dbgmsgs);
|
||||
else if (zdm)
|
||||
ksp->ks_private = list_next(&zfs_dbgmsgs, zdm);
|
||||
|
||||
return (ksp->ks_private);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_dbgmsg_purge(int max_size)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm;
|
||||
int size;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zfs_dbgmsgs_lock));
|
||||
|
||||
while (zfs_dbgmsg_size > max_size) {
|
||||
zdm = list_remove_head(&zfs_dbgmsgs);
|
||||
if (zdm == NULL)
|
||||
return;
|
||||
|
||||
size = zdm->zdm_size;
|
||||
kmem_free(zdm, size);
|
||||
zfs_dbgmsg_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_dbgmsg_update(kstat_t *ksp, int rw)
|
||||
{
|
||||
if (rw == KSTAT_WRITE)
|
||||
zfs_dbgmsg_purge(0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_dbgmsg_init(void)
|
||||
{
|
||||
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
|
||||
offsetof(zfs_dbgmsg_t, zdm_node));
|
||||
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
zfs_dbgmsg_kstat = kstat_create("zfs", 0, "dbgmsg", "misc",
|
||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||
if (zfs_dbgmsg_kstat) {
|
||||
zfs_dbgmsg_kstat->ks_lock = &zfs_dbgmsgs_lock;
|
||||
zfs_dbgmsg_kstat->ks_ndata = UINT32_MAX;
|
||||
zfs_dbgmsg_kstat->ks_private = NULL;
|
||||
zfs_dbgmsg_kstat->ks_update = zfs_dbgmsg_update;
|
||||
kstat_set_raw_ops(zfs_dbgmsg_kstat, zfs_dbgmsg_headers,
|
||||
zfs_dbgmsg_data, zfs_dbgmsg_addr);
|
||||
kstat_install(zfs_dbgmsg_kstat);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zfs_dbgmsg_fini(void)
|
||||
{
|
||||
if (zfs_dbgmsg_kstat)
|
||||
kstat_delete(zfs_dbgmsg_kstat);
|
||||
/*
|
||||
* TODO - decide how to make this permanent
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
mutex_enter(&zfs_dbgmsgs_lock);
|
||||
zfs_dbgmsg_purge(0);
|
||||
mutex_exit(&zfs_dbgmsgs_lock);
|
||||
mutex_destroy(&zfs_dbgmsgs_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
__zfs_dbgmsg(char *buf)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm;
|
||||
int size;
|
||||
|
||||
DTRACE_PROBE1(zfs__dbgmsg, char *, buf);
|
||||
|
||||
size = sizeof (zfs_dbgmsg_t) + strlen(buf);
|
||||
zdm = kmem_zalloc(size, KM_SLEEP);
|
||||
zdm->zdm_size = size;
|
||||
zdm->zdm_timestamp = gethrestime_sec();
|
||||
strcpy(zdm->zdm_msg, buf);
|
||||
|
||||
mutex_enter(&zfs_dbgmsgs_lock);
|
||||
list_insert_tail(&zfs_dbgmsgs, zdm);
|
||||
zfs_dbgmsg_size += size;
|
||||
zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
|
||||
mutex_exit(&zfs_dbgmsgs_lock);
|
||||
}
|
||||
|
||||
void
|
||||
__set_error(const char *file, const char *func, int line, int err)
|
||||
{
|
||||
/*
|
||||
* To enable this:
|
||||
*
|
||||
* $ echo 512 >/sys/module/zfs/parameters/zfs_flags
|
||||
*/
|
||||
if (zfs_flags & ZFS_DEBUG_SET_ERROR)
|
||||
__dprintf(B_FALSE, file, func, line, "error %lu", err);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
void
|
||||
__dprintf(boolean_t dprint, const char *file, const char *func,
|
||||
int line, const char *fmt, ...)
|
||||
{
|
||||
const char *newfile;
|
||||
va_list adx;
|
||||
size_t size;
|
||||
char *buf;
|
||||
char *nl;
|
||||
int i;
|
||||
|
||||
size = 1024;
|
||||
buf = kmem_alloc(size, KM_SLEEP);
|
||||
|
||||
/*
|
||||
* Get rid of annoying prefix to filename.
|
||||
*/
|
||||
newfile = strrchr(file, '/');
|
||||
if (newfile != NULL) {
|
||||
newfile = newfile + 1; /* Get rid of leading / */
|
||||
} else {
|
||||
newfile = file;
|
||||
}
|
||||
|
||||
i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
|
||||
|
||||
if (i < size) {
|
||||
va_start(adx, fmt);
|
||||
(void) vsnprintf(buf + i, size - i, fmt, adx);
|
||||
va_end(adx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get rid of trailing newline.
|
||||
*/
|
||||
nl = strrchr(buf, '\n');
|
||||
if (nl != NULL)
|
||||
*nl = '\0';
|
||||
|
||||
__zfs_dbgmsg(buf);
|
||||
|
||||
kmem_free(buf, size);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
zfs_dbgmsg_print(const char *tag)
|
||||
{
|
||||
zfs_dbgmsg_t *zdm;
|
||||
|
||||
(void) printf("ZFS_DBGMSG(%s):\n", tag);
|
||||
mutex_enter(&zfs_dbgmsgs_lock);
|
||||
for (zdm = list_head(&zfs_dbgmsgs); zdm;
|
||||
zdm = list_next(&zfs_dbgmsgs, zdm))
|
||||
(void) printf("%s\n", zdm->zdm_msg);
|
||||
mutex_exit(&zfs_dbgmsgs_lock);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef _KERNEL
|
||||
module_param(zfs_dbgmsg_enable, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
|
||||
|
||||
module_param(zfs_dbgmsg_maxsize, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
|
||||
#endif
|
||||
@@ -0,0 +1,961 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/extdirent.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/unistd.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/condvar.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/zfs_dir.h>
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zfs_fuid.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/zfs_sa.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
|
||||
/*
|
||||
* zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups
|
||||
* of names after deciding which is the appropriate lookup interface.
|
||||
*/
|
||||
static int
|
||||
zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
|
||||
matchtype_t mt, uint64_t *zoid)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zfsvfs->z_norm) {
|
||||
|
||||
/*
|
||||
* In the non-mixed case we only expect there would ever
|
||||
* be one match, but we need to use the normalizing lookup.
|
||||
*/
|
||||
error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
|
||||
zoid, mt, NULL, 0, NULL);
|
||||
} else {
|
||||
error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
|
||||
}
|
||||
*zoid = ZFS_DIRENT_OBJ(*zoid);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a directory entry under a locked vnode.
|
||||
* dvp being locked gives us a guarantee that there are no concurrent
|
||||
* modification of the directory and, thus, if a node can be found in
|
||||
* the directory, then it must not be unlinked.
|
||||
*
|
||||
* Input arguments:
|
||||
* dzp - znode for directory
|
||||
* name - name of entry to lock
|
||||
* flag - ZNEW: if the entry already exists, fail with EEXIST.
|
||||
* ZEXISTS: if the entry does not exist, fail with ENOENT.
|
||||
* ZXATTR: we want dzp's xattr directory
|
||||
*
|
||||
* Output arguments:
|
||||
* zpp - pointer to the znode for the entry (NULL if there isn't one)
|
||||
*
|
||||
* Return value: 0 on success or errno on failure.
|
||||
*
|
||||
* NOTE: Always checks for, and rejects, '.' and '..'.
|
||||
*/
|
||||
int
|
||||
zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
|
||||
znode_t *zp;
|
||||
matchtype_t mt = 0;
|
||||
uint64_t zoid;
|
||||
int error = 0;
|
||||
|
||||
if (zfsvfs->z_replay == B_FALSE)
|
||||
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
|
||||
|
||||
*zpp = NULL;
|
||||
|
||||
/*
|
||||
* Verify that we are not trying to lock '.', '..', or '.zfs'
|
||||
*/
|
||||
if (name[0] == '.' &&
|
||||
(((name[1] == '\0') || (name[1] == '.' && name[2] == '\0')) ||
|
||||
(zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)))
|
||||
return (SET_ERROR(EEXIST));
|
||||
|
||||
/*
|
||||
* Case sensitivity and normalization preferences are set when
|
||||
* the file system is created. These are stored in the
|
||||
* zfsvfs->z_case and zfsvfs->z_norm fields. These choices
|
||||
* affect how we perform zap lookups.
|
||||
*
|
||||
* When matching we may need to normalize & change case according to
|
||||
* FS settings.
|
||||
*
|
||||
* Note that a normalized match is necessary for a case insensitive
|
||||
* filesystem when the lookup request is not exact because normalization
|
||||
* can fold case independent of normalizing code point sequences.
|
||||
*
|
||||
* See the table above zfs_dropname().
|
||||
*/
|
||||
if (zfsvfs->z_norm != 0) {
|
||||
mt = MT_NORMALIZE;
|
||||
|
||||
/*
|
||||
* Determine if the match needs to honor the case specified in
|
||||
* lookup, and if so keep track of that so that during
|
||||
* normalization we don't fold case.
|
||||
*/
|
||||
if (zfsvfs->z_case == ZFS_CASE_MIXED) {
|
||||
mt |= MT_MATCH_CASE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Only look in or update the DNLC if we are looking for the
|
||||
* name on a file system that does not require normalization
|
||||
* or case folding. We can also look there if we happen to be
|
||||
* on a non-normalizing, mixed sensitivity file system IF we
|
||||
* are looking for the exact name.
|
||||
*
|
||||
* NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
|
||||
* because in that case MT_EXACT and MT_FIRST should produce exactly
|
||||
* the same result.
|
||||
*/
|
||||
|
||||
if (dzp->z_unlinked && !(flag & ZXATTR))
|
||||
return (ENOENT);
|
||||
if (flag & ZXATTR) {
|
||||
error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
|
||||
sizeof (zoid));
|
||||
if (error == 0)
|
||||
error = (zoid == 0 ? ENOENT : 0);
|
||||
} else {
|
||||
error = zfs_match_find(zfsvfs, dzp, name, mt, &zoid);
|
||||
}
|
||||
if (error) {
|
||||
if (error != ENOENT || (flag & ZEXISTS)) {
|
||||
return (error);
|
||||
}
|
||||
} else {
|
||||
if (flag & ZNEW) {
|
||||
return (SET_ERROR(EEXIST));
|
||||
}
|
||||
error = zfs_zget(zfsvfs, zoid, &zp);
|
||||
if (error)
|
||||
return (error);
|
||||
ASSERT(!zp->z_unlinked);
|
||||
*zpp = zp;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
|
||||
znode_t *zp;
|
||||
uint64_t parent;
|
||||
int error;
|
||||
|
||||
if (zfsvfs->z_replay == B_FALSE)
|
||||
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
|
||||
ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
|
||||
|
||||
if (dzp->z_unlinked)
|
||||
return (ENOENT);
|
||||
|
||||
if ((error = sa_lookup(dzp->z_sa_hdl,
|
||||
SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
|
||||
return (error);
|
||||
|
||||
error = zfs_zget(zfsvfs, parent, &zp);
|
||||
if (error == 0)
|
||||
*zpp = zp;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs __unused = dzp->z_zfsvfs;
|
||||
znode_t *zp = NULL;
|
||||
int error = 0;
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
if (zfsvfs->z_replay == B_FALSE)
|
||||
ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
|
||||
ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
|
||||
#endif
|
||||
if (dzp->z_unlinked)
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
|
||||
*zpp = dzp;
|
||||
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
|
||||
error = zfs_dd_lookup(dzp, &zp);
|
||||
if (error == 0)
|
||||
*zpp = zp;
|
||||
} else {
|
||||
error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
|
||||
if (error == 0) {
|
||||
dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
|
||||
*zpp = zp;
|
||||
}
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* unlinked Set (formerly known as the "delete queue") Error Handling
|
||||
*
|
||||
* When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
|
||||
* don't specify the name of the entry that we will be manipulating. We
|
||||
* also fib and say that we won't be adding any new entries to the
|
||||
* unlinked set, even though we might (this is to lower the minimum file
|
||||
* size that can be deleted in a full filesystem). So on the small
|
||||
* chance that the nlink list is using a fat zap (ie. has more than
|
||||
* 2000 entries), we *may* not pre-read a block that's needed.
|
||||
* Therefore it is remotely possible for some of the assertions
|
||||
* regarding the unlinked set below to fail due to i/o error. On a
|
||||
* nondebug system, this will result in the space being leaked.
|
||||
*/
|
||||
void
|
||||
zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
|
||||
ASSERT(zp->z_unlinked);
|
||||
ASSERT(zp->z_links == 0);
|
||||
|
||||
VERIFY3U(0, ==,
|
||||
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up any znodes that had no links when we either crashed or
|
||||
* (force) umounted the file system.
|
||||
*/
|
||||
void
|
||||
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t zap;
|
||||
dmu_object_info_t doi;
|
||||
znode_t *zp;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Interate over the contents of the unlinked set.
|
||||
*/
|
||||
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
|
||||
zap_cursor_retrieve(&zc, &zap) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
|
||||
/*
|
||||
* See what kind of object we have in list
|
||||
*/
|
||||
|
||||
error = dmu_object_info(zfsvfs->z_os,
|
||||
zap.za_first_integer, &doi);
|
||||
if (error != 0)
|
||||
continue;
|
||||
|
||||
ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
|
||||
(doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
|
||||
/*
|
||||
* We need to re-mark these list entries for deletion,
|
||||
* so we pull them back into core and set zp->z_unlinked.
|
||||
*/
|
||||
error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
|
||||
|
||||
/*
|
||||
* We may pick up znodes that are already marked for deletion.
|
||||
* This could happen during the purge of an extended attribute
|
||||
* directory. All we need to do is skip over them, since they
|
||||
* are already in the system marked z_unlinked.
|
||||
*/
|
||||
if (error != 0)
|
||||
continue;
|
||||
|
||||
vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
|
||||
|
||||
/*
|
||||
* Due to changes in zfs_rmnode we need to make sure the
|
||||
* link count is set to zero here.
|
||||
*/
|
||||
if (zp->z_links != 0) {
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
vput(ZTOV(zp));
|
||||
continue;
|
||||
}
|
||||
zp->z_links = 0;
|
||||
VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
|
||||
&zp->z_links, sizeof (zp->z_links), tx));
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
zp->z_unlinked = B_TRUE;
|
||||
vput(ZTOV(zp));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete the entire contents of a directory. Return a count
|
||||
* of the number of entries that could not be deleted. If we encounter
|
||||
* an error, return a count of at least one so that the directory stays
|
||||
* in the unlinked set.
|
||||
*
|
||||
* NOTE: this function assumes that the directory is inactive,
|
||||
* so there is no need to lock its entries before deletion.
|
||||
* Also, it assumes the directory contents is *only* regular
|
||||
* files.
|
||||
*/
|
||||
static int
|
||||
zfs_purgedir(znode_t *dzp)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t zap;
|
||||
znode_t *xzp;
|
||||
dmu_tx_t *tx;
|
||||
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
|
||||
int skipped = 0;
|
||||
int error;
|
||||
|
||||
for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
|
||||
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
error = zfs_zget(zfsvfs,
|
||||
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
|
||||
if (error) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
|
||||
ASSERT((ZTOV(xzp)->v_type == VREG) ||
|
||||
(ZTOV(xzp)->v_type == VLNK));
|
||||
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
|
||||
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
|
||||
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
|
||||
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
|
||||
/* Is this really needed ? */
|
||||
zfs_sa_upgrade_txholds(tx, xzp);
|
||||
dmu_tx_mark_netfree(tx);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
vput(ZTOV(xzp));
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
|
||||
if (error)
|
||||
skipped += 1;
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
vput(ZTOV(xzp));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
if (error != ENOENT)
|
||||
skipped += 1;
|
||||
return (skipped);
|
||||
}
|
||||
|
||||
extern taskq_t *zfsvfs_taskq;
|
||||
|
||||
void
|
||||
zfs_rmnode(znode_t *zp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
objset_t *os = zfsvfs->z_os;
|
||||
dmu_tx_t *tx;
|
||||
uint64_t acl_obj;
|
||||
uint64_t xattr_obj;
|
||||
uint64_t count;
|
||||
int error;
|
||||
|
||||
ASSERT(zp->z_links == 0);
|
||||
if (zfsvfs->z_replay == B_FALSE)
|
||||
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
|
||||
|
||||
/*
|
||||
* If this is an attribute directory, purge its contents.
|
||||
*/
|
||||
if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
|
||||
(zp->z_pflags & ZFS_XATTR)) {
|
||||
if (zfs_purgedir(zp) != 0) {
|
||||
/*
|
||||
* Not enough space to delete some xattrs.
|
||||
* Leave it in the unlinked set.
|
||||
*/
|
||||
zfs_znode_dmu_fini(zp);
|
||||
zfs_znode_free(zp);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Free up all the data in the file. We don't do this for
|
||||
* XATTR directories because we need truncate and remove to be
|
||||
* in the same tx, like in zfs_znode_delete(). Otherwise, if
|
||||
* we crash here we'll end up with an inconsistent truncated
|
||||
* zap object in the delete queue. Note a truncated file is
|
||||
* harmless since it only contains user data.
|
||||
*/
|
||||
error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
|
||||
if (error) {
|
||||
/*
|
||||
* Not enough space or we were interrupted by unmount.
|
||||
* Leave the file in the unlinked set.
|
||||
*/
|
||||
zfs_znode_dmu_fini(zp);
|
||||
zfs_znode_free(zp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the file has extended attributes, we're going to unlink
|
||||
* the xattr dir.
|
||||
*/
|
||||
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
|
||||
&xattr_obj, sizeof (xattr_obj));
|
||||
if (error)
|
||||
xattr_obj = 0;
|
||||
|
||||
acl_obj = zfs_external_acl(zp);
|
||||
|
||||
/*
|
||||
* Set up the final transaction.
|
||||
*/
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
|
||||
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
|
||||
if (xattr_obj)
|
||||
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
|
||||
if (acl_obj)
|
||||
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
|
||||
|
||||
zfs_sa_upgrade_txholds(tx, zp);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
/*
|
||||
* Not enough space to delete the file. Leave it in the
|
||||
* unlinked set, leaking it until the fs is remounted (at
|
||||
* which point we'll call zfs_unlinked_drain() to process it).
|
||||
*/
|
||||
dmu_tx_abort(tx);
|
||||
zfs_znode_dmu_fini(zp);
|
||||
zfs_znode_free(zp);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* FreeBSD's implemention of zfs_zget requires a vnode to back it.
|
||||
* This means that we could end up calling into getnewvnode while
|
||||
* calling zfs_rmnode as a result of a prior call to getnewvnode
|
||||
* trying to clear vnodes out of the cache. If this repeats we can
|
||||
* recurse enough that we overflow our stack. To avoid this, we
|
||||
* avoid calling zfs_zget on the xattr znode and instead simply add
|
||||
* it to the unlinked set and schedule a call to zfs_unlinked_drain.
|
||||
*/
|
||||
if (xattr_obj) {
|
||||
/* Add extended attribute directory to the unlinked set. */
|
||||
VERIFY3U(0, ==,
|
||||
zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx));
|
||||
}
|
||||
|
||||
mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
|
||||
|
||||
/* Remove this znode from the unlinked set */
|
||||
VERIFY3U(0, ==,
|
||||
zap_remove_int(os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
||||
|
||||
if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
|
||||
cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
|
||||
}
|
||||
|
||||
mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
|
||||
|
||||
zfs_znode_delete(zp, tx);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
if (xattr_obj) {
|
||||
/*
|
||||
* We're using the FreeBSD taskqueue API here instead of
|
||||
* the Solaris taskq API since the FreeBSD API allows for a
|
||||
* task to be enqueued multiple times but executed once.
|
||||
*/
|
||||
taskqueue_enqueue(zfsvfs_taskq->tq_queue,
|
||||
&zfsvfs->z_unlinked_drain_task);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
zfs_dirent(znode_t *zp, uint64_t mode)
|
||||
{
|
||||
uint64_t de = zp->z_id;
|
||||
|
||||
if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
|
||||
de |= IFTODT(mode) << 60;
|
||||
return (de);
|
||||
}
|
||||
|
||||
/*
|
||||
* Link zp into dzp. Can only fail if zp has been unlinked.
|
||||
*/
|
||||
int
|
||||
zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
|
||||
int flag)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
vnode_t *vp = ZTOV(zp);
|
||||
uint64_t value;
|
||||
int zp_is_dir = (vp->v_type == VDIR);
|
||||
sa_bulk_attr_t bulk[5];
|
||||
uint64_t mtime[2], ctime[2];
|
||||
int count = 0;
|
||||
int error;
|
||||
|
||||
if (zfsvfs->z_replay == B_FALSE) {
|
||||
ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
|
||||
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
|
||||
}
|
||||
if (zp_is_dir) {
|
||||
if (dzp->z_links >= ZFS_LINK_MAX)
|
||||
return (SET_ERROR(EMLINK));
|
||||
}
|
||||
if (!(flag & ZRENAMING)) {
|
||||
if (zp->z_unlinked) { /* no new links to unlinked zp */
|
||||
ASSERT(!(flag & (ZNEW | ZEXISTS)));
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
if (zp->z_links >= ZFS_LINK_MAX - zp_is_dir) {
|
||||
return (SET_ERROR(EMLINK));
|
||||
}
|
||||
zp->z_links++;
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
|
||||
&zp->z_links, sizeof (zp->z_links));
|
||||
|
||||
} else {
|
||||
ASSERT(zp->z_unlinked == 0);
|
||||
}
|
||||
value = zfs_dirent(zp, zp->z_mode);
|
||||
error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
|
||||
8, 1, &value, tx);
|
||||
|
||||
/*
|
||||
* zap_add could fail to add the entry if it exceeds the capacity of the
|
||||
* leaf-block and zap_leaf_split() failed to help.
|
||||
* The caller of this routine is responsible for failing the transaction
|
||||
* which will rollback the SA updates done above.
|
||||
*/
|
||||
if (error != 0) {
|
||||
if (!(flag & ZRENAMING) && !(flag & ZNEW))
|
||||
zp->z_links--;
|
||||
return (error);
|
||||
}
|
||||
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
|
||||
&dzp->z_id, sizeof (dzp->z_id));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
|
||||
&zp->z_pflags, sizeof (zp->z_pflags));
|
||||
|
||||
if (!(flag & ZNEW)) {
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
|
||||
ctime, sizeof (ctime));
|
||||
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
|
||||
ctime);
|
||||
}
|
||||
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||
ASSERT0(error);
|
||||
|
||||
dzp->z_size++;
|
||||
dzp->z_links += zp_is_dir;
|
||||
count = 0;
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
|
||||
&dzp->z_size, sizeof (dzp->z_size));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
|
||||
&dzp->z_links, sizeof (dzp->z_links));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
|
||||
mtime, sizeof (mtime));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
|
||||
ctime, sizeof (ctime));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
|
||||
&dzp->z_pflags, sizeof (dzp->z_pflags));
|
||||
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
|
||||
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
|
||||
ASSERT0(error);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The match type in the code for this function should conform to:
|
||||
*
|
||||
* ------------------------------------------------------------------------
|
||||
* fs type | z_norm | lookup type | match type
|
||||
* ---------|-------------|-------------|----------------------------------
|
||||
* CS !norm | 0 | 0 | 0 (exact)
|
||||
* CS norm | formX | 0 | MT_NORMALIZE
|
||||
* CI !norm | upper | !ZCIEXACT | MT_NORMALIZE
|
||||
* CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
|
||||
* CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE
|
||||
* CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
|
||||
* CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
|
||||
* CM !norm | upper | ZCILOOK | MT_NORMALIZE
|
||||
* CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
|
||||
* CM norm | upper|formX | ZCILOOK | MT_NORMALIZE
|
||||
*
|
||||
* Abbreviations:
|
||||
* CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
|
||||
* upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
|
||||
* formX = unicode normalization form set on fs creation
|
||||
*/
|
||||
static int
|
||||
zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
|
||||
int flag)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zp->z_zfsvfs->z_norm) {
|
||||
matchtype_t mt = MT_NORMALIZE;
|
||||
|
||||
if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) {
|
||||
mt |= MT_MATCH_CASE;
|
||||
}
|
||||
|
||||
error = zap_remove_norm(zp->z_zfsvfs->z_os, dzp->z_id,
|
||||
name, mt, tx);
|
||||
} else {
|
||||
error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, name, tx);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlink zp from dzp, and mark zp for deletion if this was the last link.
|
||||
* Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
|
||||
* If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
|
||||
* If it's non-NULL, we use it to indicate whether the znode needs deletion,
|
||||
* and it's the caller's job to do it.
|
||||
*/
|
||||
int
|
||||
zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
|
||||
int flag, boolean_t *unlinkedp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
|
||||
vnode_t *vp = ZTOV(zp);
|
||||
int zp_is_dir = (vp->v_type == VDIR);
|
||||
boolean_t unlinked = B_FALSE;
|
||||
sa_bulk_attr_t bulk[5];
|
||||
uint64_t mtime[2], ctime[2];
|
||||
int count = 0;
|
||||
int error;
|
||||
|
||||
if (zfsvfs->z_replay == B_FALSE) {
|
||||
ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
|
||||
ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
|
||||
}
|
||||
if (!(flag & ZRENAMING)) {
|
||||
|
||||
if (zp_is_dir && !zfs_dirempty(zp))
|
||||
return (SET_ERROR(ENOTEMPTY));
|
||||
|
||||
/*
|
||||
* If we get here, we are going to try to remove the object.
|
||||
* First try removing the name from the directory; if that
|
||||
* fails, return the error.
|
||||
*/
|
||||
error = zfs_dropname(dzp, name, zp, tx, flag);
|
||||
if (error != 0) {
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (zp->z_links <= zp_is_dir) {
|
||||
zfs_panic_recover("zfs: link count on vnode %p is %u, "
|
||||
"should be at least %u", zp->z_vnode,
|
||||
(int)zp->z_links,
|
||||
zp_is_dir + 1);
|
||||
zp->z_links = zp_is_dir + 1;
|
||||
}
|
||||
if (--zp->z_links == zp_is_dir) {
|
||||
zp->z_unlinked = B_TRUE;
|
||||
zp->z_links = 0;
|
||||
unlinked = B_TRUE;
|
||||
} else {
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
|
||||
NULL, &ctime, sizeof (ctime));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
|
||||
NULL, &zp->z_pflags, sizeof (zp->z_pflags));
|
||||
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
|
||||
ctime);
|
||||
}
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
|
||||
NULL, &zp->z_links, sizeof (zp->z_links));
|
||||
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||
count = 0;
|
||||
ASSERT0(error);
|
||||
} else {
|
||||
ASSERT(zp->z_unlinked == 0);
|
||||
error = zfs_dropname(dzp, name, zp, tx, flag);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
|
||||
dzp->z_size--; /* one dirent removed */
|
||||
dzp->z_links -= zp_is_dir; /* ".." link from zp */
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
|
||||
NULL, &dzp->z_links, sizeof (dzp->z_links));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
|
||||
NULL, &dzp->z_size, sizeof (dzp->z_size));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
|
||||
NULL, ctime, sizeof (ctime));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
|
||||
NULL, mtime, sizeof (mtime));
|
||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
|
||||
NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
|
||||
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
|
||||
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
|
||||
ASSERT0(error);
|
||||
|
||||
if (unlinkedp != NULL)
|
||||
*unlinkedp = unlinked;
|
||||
else if (unlinked)
|
||||
zfs_unlinked_add(zp, tx);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Indicate whether the directory is empty.
|
||||
*/
|
||||
boolean_t
|
||||
zfs_dirempty(znode_t *dzp)
|
||||
{
|
||||
return (dzp->z_size == 2);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xvpp, cred_t *cr)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
znode_t *xzp;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
zfs_acl_ids_t acl_ids;
|
||||
boolean_t fuid_dirtied;
|
||||
uint64_t parent __unused;
|
||||
|
||||
*xvpp = NULL;
|
||||
|
||||
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
|
||||
&acl_ids)) != 0)
|
||||
return (error);
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 0)) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
return (SET_ERROR(EDQUOT));
|
||||
}
|
||||
|
||||
getnewvnode_reserve_();
|
||||
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
|
||||
ZFS_SA_BASE_ATTR_SIZE);
|
||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_abort(tx);
|
||||
getnewvnode_drop_reserve();
|
||||
return (error);
|
||||
}
|
||||
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
#ifdef DEBUG
|
||||
error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
|
||||
&parent, sizeof (parent));
|
||||
ASSERT(error == 0 && parent == zp->z_id);
|
||||
#endif
|
||||
|
||||
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
|
||||
sizeof (xzp->z_id), tx));
|
||||
|
||||
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
|
||||
xzp, "", NULL, acl_ids.z_fuidp, vap);
|
||||
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
getnewvnode_drop_reserve();
|
||||
|
||||
*xvpp = xzp;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a znode for the extended attribute directory for zp.
|
||||
* ** If the directory does not already exist, it is created **
|
||||
*
|
||||
* IN: zp - znode to obtain attribute directory from
|
||||
* cr - credentials of caller
|
||||
* flags - flags from the VOP_LOOKUP call
|
||||
*
|
||||
* OUT: xzpp - pointer to extended attribute znode
|
||||
*
|
||||
* RETURN: 0 on success
|
||||
* error number on failure
|
||||
*/
|
||||
int
|
||||
zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
znode_t *xzp;
|
||||
vattr_t va;
|
||||
int error;
|
||||
top:
|
||||
error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (xzp != NULL) {
|
||||
*xzpp = xzp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
if (!(flags & CREATE_XATTR_DIR))
|
||||
return (SET_ERROR(ENOATTR));
|
||||
|
||||
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
|
||||
return (SET_ERROR(EROFS));
|
||||
}
|
||||
|
||||
/*
|
||||
* The ability to 'create' files in an attribute
|
||||
* directory comes from the write_xattr permission on the base file.
|
||||
*
|
||||
* The ability to 'search' an attribute directory requires
|
||||
* read_xattr permission on the base file.
|
||||
*
|
||||
* Once in a directory the ability to read/write attributes
|
||||
* is controlled by the permissions on the attribute file.
|
||||
*/
|
||||
va.va_mask = AT_MODE | AT_UID | AT_GID;
|
||||
va.va_type = VDIR;
|
||||
va.va_mode = S_IFDIR | S_ISVTX | 0777;
|
||||
zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
|
||||
|
||||
error = zfs_make_xattrdir(zp, &va, xzpp, cr);
|
||||
|
||||
if (error == ERESTART) {
|
||||
/* NB: we already did dmu_tx_wait() if necessary */
|
||||
goto top;
|
||||
}
|
||||
if (error == 0)
|
||||
VOP_UNLOCK1(ZTOV(*xzpp));
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide whether it is okay to remove within a sticky directory.
|
||||
*
|
||||
* In sticky directories, write access is not sufficient;
|
||||
* you can remove entries from a directory only if:
|
||||
*
|
||||
* you own the directory,
|
||||
* you own the entry,
|
||||
* the entry is a plain file and you have write access,
|
||||
* or you are privileged (checked in secpolicy...).
|
||||
*
|
||||
* The function returns 0 if remove access is granted.
|
||||
*/
|
||||
int
|
||||
zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
|
||||
{
|
||||
uid_t uid;
|
||||
uid_t downer;
|
||||
uid_t fowner;
|
||||
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
|
||||
|
||||
if (zdp->z_zfsvfs->z_replay)
|
||||
return (0);
|
||||
|
||||
if ((zdp->z_mode & S_ISVTX) == 0)
|
||||
return (0);
|
||||
|
||||
downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
|
||||
fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
|
||||
|
||||
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
|
||||
(ZTOV(zp)->v_type == VREG &&
|
||||
zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
|
||||
return (0);
|
||||
else
|
||||
return (secpolicy_vnode_remove(ZTOV(zp), cr));
|
||||
}
|
||||
@@ -0,0 +1,309 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_recv.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_traverse.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_file.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
int
|
||||
zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
|
||||
{
|
||||
struct thread *td;
|
||||
int rc, fd;
|
||||
|
||||
td = curthread;
|
||||
pwd_ensure_dirs();
|
||||
/* 12.x doesn't take a const char * */
|
||||
rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path),
|
||||
UIO_SYSSPACE, flags, mode);
|
||||
if (rc)
|
||||
return (SET_ERROR(rc));
|
||||
fd = td->td_retval[0];
|
||||
td->td_retval[0] = 0;
|
||||
if (fget(curthread, fd, &cap_no_rights, fpp))
|
||||
kern_close(td, fd);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_file_close(zfs_file_t *fp)
|
||||
{
|
||||
fo_close(fp, curthread);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *offp,
|
||||
ssize_t *resid)
|
||||
{
|
||||
ssize_t rc;
|
||||
struct uio auio;
|
||||
struct thread *td;
|
||||
struct iovec aiov;
|
||||
|
||||
td = curthread;
|
||||
aiov.iov_base = (void *)(uintptr_t)buf;
|
||||
aiov.iov_len = count;
|
||||
auio.uio_iov = &aiov;
|
||||
auio.uio_iovcnt = 1;
|
||||
auio.uio_segflg = UIO_SYSSPACE;
|
||||
auio.uio_resid = count;
|
||||
auio.uio_rw = UIO_WRITE;
|
||||
auio.uio_td = td;
|
||||
auio.uio_offset = *offp;
|
||||
|
||||
if ((fp->f_flag & FWRITE) == 0)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
if (fp->f_type == DTYPE_VNODE)
|
||||
bwillwrite();
|
||||
|
||||
rc = fo_write(fp, &auio, td->td_ucred, FOF_OFFSET, td);
|
||||
if (rc)
|
||||
return (SET_ERROR(rc));
|
||||
if (resid)
|
||||
*resid = auio.uio_resid;
|
||||
else if (auio.uio_resid)
|
||||
return (SET_ERROR(EIO));
|
||||
*offp += count - auio.uio_resid;
|
||||
return (rc);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
|
||||
{
|
||||
loff_t off = fp->f_offset;
|
||||
ssize_t rc;
|
||||
|
||||
rc = zfs_file_write_impl(fp, buf, count, &off, resid);
|
||||
if (rc == 0)
|
||||
fp->f_offset = off;
|
||||
|
||||
return (SET_ERROR(rc));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
|
||||
ssize_t *resid)
|
||||
{
|
||||
return (zfs_file_write_impl(fp, buf, count, &off, resid));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *offp,
|
||||
ssize_t *resid)
|
||||
{
|
||||
ssize_t rc;
|
||||
struct uio auio;
|
||||
struct thread *td;
|
||||
struct iovec aiov;
|
||||
|
||||
td = curthread;
|
||||
aiov.iov_base = (void *)(uintptr_t)buf;
|
||||
aiov.iov_len = count;
|
||||
auio.uio_iov = &aiov;
|
||||
auio.uio_iovcnt = 1;
|
||||
auio.uio_segflg = UIO_SYSSPACE;
|
||||
auio.uio_resid = count;
|
||||
auio.uio_rw = UIO_READ;
|
||||
auio.uio_td = td;
|
||||
auio.uio_offset = *offp;
|
||||
|
||||
if ((fp->f_flag & FREAD) == 0)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
rc = fo_read(fp, &auio, td->td_ucred, FOF_OFFSET, td);
|
||||
if (rc)
|
||||
return (SET_ERROR(rc));
|
||||
*resid = auio.uio_resid;
|
||||
*offp += count - auio.uio_resid;
|
||||
return (SET_ERROR(0));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
|
||||
{
|
||||
loff_t off = fp->f_offset;
|
||||
ssize_t rc;
|
||||
|
||||
rc = zfs_file_read_impl(fp, buf, count, &off, resid);
|
||||
if (rc == 0)
|
||||
fp->f_offset = off;
|
||||
return (rc);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
|
||||
ssize_t *resid)
|
||||
{
|
||||
return (zfs_file_read_impl(fp, buf, count, &off, resid));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
|
||||
{
|
||||
int rc;
|
||||
struct thread *td;
|
||||
|
||||
td = curthread;
|
||||
if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
|
||||
return (SET_ERROR(ESPIPE));
|
||||
rc = fo_seek(fp, *offp, whence, td);
|
||||
if (rc == 0)
|
||||
*offp = td->td_uretoff.tdu_off;
|
||||
return (SET_ERROR(rc));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
|
||||
{
|
||||
struct thread *td;
|
||||
struct stat sb;
|
||||
int rc;
|
||||
|
||||
td = curthread;
|
||||
|
||||
rc = fo_stat(fp, &sb, td->td_ucred, td);
|
||||
if (rc)
|
||||
return (SET_ERROR(rc));
|
||||
zfattr->zfa_size = sb.st_size;
|
||||
zfattr->zfa_mode = sb.st_mode;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static __inline int
|
||||
zfs_vop_fsync(vnode_t *vp)
|
||||
{
|
||||
struct mount *mp;
|
||||
int error;
|
||||
|
||||
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
|
||||
goto drop;
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
|
||||
VOP_UNLOCK1(vp);
|
||||
vn_finished_write(mp);
|
||||
drop:
|
||||
return (SET_ERROR(error));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_fsync(zfs_file_t *fp, int flags)
|
||||
{
|
||||
struct vnode *v;
|
||||
|
||||
if (fp->f_type != DTYPE_VNODE)
|
||||
return (EINVAL);
|
||||
|
||||
v = fp->f_data;
|
||||
return (zfs_vop_fsync(v));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_get(int fd, zfs_file_t **fpp)
|
||||
{
|
||||
struct file *fp;
|
||||
|
||||
if (fget(curthread, fd, &cap_no_rights, &fp))
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
*fpp = fp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_file_put(int fd)
|
||||
{
|
||||
struct file *fp;
|
||||
|
||||
/* No CAP_ rights required, as we're only releasing. */
|
||||
if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
|
||||
fdrop(fp, curthread);
|
||||
fdrop(fp, curthread);
|
||||
}
|
||||
}
|
||||
|
||||
loff_t
|
||||
zfs_file_off(zfs_file_t *fp)
|
||||
{
|
||||
return (fp->f_offset);
|
||||
}
|
||||
|
||||
void *
|
||||
zfs_file_private(zfs_file_t *fp)
|
||||
{
|
||||
file_t *tmpfp;
|
||||
void *data;
|
||||
int error;
|
||||
|
||||
tmpfp = curthread->td_fpop;
|
||||
curthread->td_fpop = fp;
|
||||
error = devfs_get_cdevpriv(&data);
|
||||
curthread->td_fpop = tmpfp;
|
||||
if (error != 0)
|
||||
return (NULL);
|
||||
return (data);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_file_unlink(const char *fnamep)
|
||||
{
|
||||
enum uio_seg seg = UIO_SYSSPACE;
|
||||
int rc;
|
||||
|
||||
#if __FreeBSD_version >= 1300018
|
||||
rc = kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0, 0);
|
||||
#else
|
||||
#ifdef AT_BENEATH
|
||||
rc = kern_unlinkat(curthread, AT_FDCWD, fnamep, seg, 0, 0);
|
||||
#else
|
||||
rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
|
||||
seg, 0);
|
||||
#endif
|
||||
#endif
|
||||
return (SET_ERROR(rc));
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/nvpair.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/sid.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#endif
|
||||
#include <sys/zfs_fuid.h>
|
||||
|
||||
uint64_t
|
||||
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
|
||||
cred_t *cr, zfs_fuid_info_t **fuidp)
|
||||
{
|
||||
uid_t id;
|
||||
|
||||
VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
|
||||
|
||||
id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
|
||||
|
||||
if (IS_EPHEMERAL(id))
|
||||
return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
|
||||
|
||||
return ((uint64_t)id);
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright (c) 2020 iXsystems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_os.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_redact.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/policy.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zfs_dir.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
#include <sys/zvol.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/fm/util.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
|
||||
#include <sys/dmu_recv.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dmu_recv.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zcp.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/vdev_trim.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/vdev_initialize.h>
|
||||
#include <sys/zfs_ioctl_impl.h>
|
||||
|
||||
int
|
||||
zfs_vfs_ref(zfsvfs_t **zfvp)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (*zfvp == NULL)
|
||||
return (SET_ERROR(ESRCH));
|
||||
|
||||
error = vfs_busy((*zfvp)->z_vfs, 0);
|
||||
if (error != 0) {
|
||||
*zfvp = NULL;
|
||||
error = SET_ERROR(ESRCH);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_vfs_held(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
return (zfsvfs->z_vfs != NULL);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_vfs_rele(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
vfs_unbusy(zfsvfs->z_vfs);
|
||||
}
|
||||
|
||||
static const zfs_ioc_key_t zfs_keys_nextboot[] = {
|
||||
{"command", DATA_TYPE_STRING, 0},
|
||||
{ ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, 0},
|
||||
{ ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0}
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_jail(zfs_cmd_t *zc)
|
||||
{
|
||||
|
||||
return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
|
||||
(int)zc->zc_zoneid));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_unjail(zfs_cmd_t *zc)
|
||||
{
|
||||
|
||||
return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
|
||||
(int)zc->zc_zoneid));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
char name[MAXNAMELEN];
|
||||
spa_t *spa;
|
||||
vdev_t *vd;
|
||||
char *command;
|
||||
uint64_t pool_guid;
|
||||
uint64_t vdev_guid;
|
||||
int error;
|
||||
|
||||
if (nvlist_lookup_uint64(innvl,
|
||||
ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
|
||||
return (EINVAL);
|
||||
if (nvlist_lookup_uint64(innvl,
|
||||
ZPOOL_CONFIG_GUID, &vdev_guid) != 0)
|
||||
return (EINVAL);
|
||||
if (nvlist_lookup_string(innvl,
|
||||
"command", &command) != 0)
|
||||
return (EINVAL);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa = spa_by_guid(pool_guid, vdev_guid);
|
||||
if (spa != NULL)
|
||||
strcpy(name, spa_name(spa));
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
if (spa == NULL)
|
||||
return (ENOENT);
|
||||
|
||||
if ((error = spa_open(name, &spa, FTAG)) != 0)
|
||||
return (error);
|
||||
spa_vdev_state_enter(spa, SCL_ALL);
|
||||
vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE);
|
||||
if (vd == NULL) {
|
||||
(void) spa_vdev_state_exit(spa, NULL, ENXIO);
|
||||
spa_close(spa, FTAG);
|
||||
return (ENODEV);
|
||||
}
|
||||
error = vdev_label_write_pad2(vd, command, strlen(command));
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
txg_wait_synced(spa->spa_dsl_pool, 0);
|
||||
spa_close(spa, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
zfs_ioctl_init_os(void)
|
||||
{
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
|
||||
zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
|
||||
zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register("fbsd_nextboot", ZFS_IOC_NEXTBOOT,
|
||||
zfs_ioc_nextboot, zfs_secpolicy_config, NO_NAME,
|
||||
POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_nextboot, 3);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
|
||||
static int
|
||||
zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
|
||||
{
|
||||
*zo = zfsdev_get_state(minor, ZST_ONEXIT);
|
||||
if (*zo == NULL)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_onexit_fd_hold(int fd, minor_t *minorp)
|
||||
{
|
||||
file_t *fp, *tmpfp;
|
||||
zfs_onexit_t *zo;
|
||||
void *data;
|
||||
int error;
|
||||
|
||||
if ((error = zfs_file_get(fd, &fp)))
|
||||
return (error);
|
||||
|
||||
tmpfp = curthread->td_fpop;
|
||||
curthread->td_fpop = fp;
|
||||
error = devfs_get_cdevpriv(&data);
|
||||
if (error == 0)
|
||||
*minorp = (minor_t)(uintptr_t)data;
|
||||
curthread->td_fpop = tmpfp;
|
||||
if (error != 0)
|
||||
return (SET_ERROR(EBADF));
|
||||
return (zfs_onexit_minor_to_state(*minorp, &zo));
|
||||
}
|
||||
|
||||
void
|
||||
zfs_onexit_fd_rele(int fd)
|
||||
{
|
||||
zfs_file_put(fd);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user