From 3c67d83a8afb391f20bc53d36a0cebea6897b3e2 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Wed, 15 Jun 2016 15:47:05 -0700 Subject: [PATCH 1/4] OpenZFS 4185 - add new cryptographic checksums to ZFS: SHA-512, Skein, Edon-R Reviewed by: George Wilson Reviewed by: Prakash Surya Reviewed by: Saso Kiselkov Reviewed by: Richard Lowe Approved by: Garrett D'Amore Ported by: Tony Hutter OpenZFS-issue: https://www.illumos.org/issues/4185 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/45818ee Porting Notes: This code is ported on top of the Illumos Crypto Framework code: https://github.com/zfsonlinux/zfs/pull/4329/commits/b5e030c8dbb9cd393d313571dee4756fbba8c22d The list of porting changes includes: - Copied module/icp/include/sha2/sha2.h directly from illumos - Removed from module/icp/algs/sha2/sha2.c: #pragma inline(SHA256Init, SHA384Init, SHA512Init) - Added 'ctx' to lib/libzfs/libzfs_sendrecv.c:zio_checksum_SHA256() since it now takes in an extra parameter. - Added CTASSERT() to assert.h from for module/zfs/edonr_zfs.c - Added skein & edonr to libicp/Makefile.am - Added sha512.S. It was generated from sha512-x86_64.pl in Illumos. - Updated ztest.c with new fletcher_4_*() args; used NULL for new CTX argument. - In icp/algs/edonr/edonr_byteorder.h, Removed the #if defined(__linux) section to not #include the non-existant endian.h. - In skein_test.c, renane NULL to 0 in "no test vector" array entries to get around a compiler warning. - Fixup test files: - Rename -> , -> , - Remove and define NOTE() as NOP. - Define u_longlong_t - Rename "#!/usr/bin/ksh" -> "#!/bin/ksh -p" - Rename NULL to 0 in "no test vector" array entries to get around a compiler warning. - Remove "for isa in $($ISAINFO); do" stuff - Add/update Makefiles - Add some userspace headers like stdio.h/stdlib.h in places of sys/types.h. - EXPORT_SYMBOL *_Init/*_Update/*_Final... routines in ICP modules. - Update scripts/zfs2zol-patch.sed - include in sha2_impl.h - Add sha2.h to include/sys/Makefile.am - Add skein and edonr dirs to icp Makefile - Add new checksums to zpool_get.cfg - Move checksum switch block from zfs_secpolicy_setprop() to zfs_check_settable() - Fix -Wuninitialized error in edonr_byteorder.h on PPC - Fix stack frame size errors on ARM32 - Don't unroll loops in Skein on 32-bit to save stack space - Add memory barriers in sha2.c on 32-bit to save stack space - Add filetest_001_pos.ksh checksum sanity test - Add option to write psudorandom data in file_write utility --- Makefile.am | 2 +- cmd/ztest/ztest.c | 8 +- configure.ac | 2 + include/sys/Makefile.am | 3 + include/sys/crypto/icp.h | 6 + include/sys/dmu.h | 2 + include/sys/edonr.h | 98 + .../icp/include/sha2 => include/sys}/sha2.h | 39 + include/sys/skein.h | 183 ++ include/sys/spa.h | 9 + include/sys/spa_impl.h | 5 + include/sys/zio.h | 4 + include/sys/zio_checksum.h | 53 +- include/zfeature_common.h | 3 + include/zfs_fletcher.h | 11 +- lib/libicp/Makefile.am | 9 +- lib/libspl/include/assert.h | 8 + lib/libzfs/libzfs_dataset.c | 6 + lib/libzfs/libzfs_sendrecv.c | 4 +- lib/libzpool/Makefile.am | 2 + man/man5/zpool-features.5 | 108 +- man/man8/zfs.8 | 15 +- module/icp/Makefile.in | 10 + module/icp/algs/edonr/edonr.c | 751 ++++++ module/icp/algs/edonr/edonr_byteorder.h | 216 ++ module/icp/algs/sha2/sha2.c | 477 +++- module/icp/algs/skein/THIRDPARTYLICENSE | 3 + .../icp/algs/skein/THIRDPARTYLICENSE.descrip | 1 + module/icp/algs/skein/skein.c | 921 ++++++++ module/icp/algs/skein/skein_block.c | 793 +++++++ module/icp/algs/skein/skein_impl.h | 289 +++ module/icp/algs/skein/skein_iv.c | 185 ++ module/icp/algs/skein/skein_port.h | 128 + module/icp/asm-x86_64/sha2/sha256_impl.S | 8 +- module/icp/asm-x86_64/sha2/sha512_impl.S | 2083 +++++++++++++++++ module/icp/illumos-crypto.c | 4 + module/icp/include/sha2/sha2_impl.h | 2 + module/icp/io/edonr_mod.c | 62 + module/icp/io/sha2_mod.c | 2 +- module/icp/io/skein_mod.c | 721 ++++++ module/zcommon/zfs_fletcher.c | 21 +- module/zcommon/zfs_prop.c | 20 +- module/zfs/Makefile.in | 2 + module/zfs/arc.c | 4 +- module/zfs/dbuf.c | 3 +- module/zfs/ddt.c | 5 +- module/zfs/dmu.c | 24 +- module/zfs/dmu_send.c | 3 +- module/zfs/dsl_dataset.c | 8 + module/zfs/edonr_zfs.c | 103 + module/zfs/sha256.c | 146 +- module/zfs/skein_zfs.c | 91 + module/zfs/spa.c | 34 + module/zfs/spa_misc.c | 7 +- module/zfs/vdev_raidz.c | 7 + module/zfs/zfeature_common.c | 12 + module/zfs/zfs_ioctl.c | 47 +- module/zfs/zio.c | 37 +- module/zfs/zio_checksum.c | 163 +- scripts/zfs2zol-patch.sed | 19 + tests/runfiles/linux.run | 3 + tests/zfs-tests/cmd/file_write/file_write.c | 46 +- tests/zfs-tests/include/libtest.shlib | 14 + tests/zfs-tests/include/properties.shlib | 3 +- tests/zfs-tests/tests/functional/Makefile.am | 1 + .../tests/functional/checksum/.gitignore | 4 + .../tests/functional/checksum/Makefile.am | 26 + .../tests/functional/checksum/cleanup.ksh | 30 + .../tests/functional/checksum/edonr_test.c | 219 ++ .../functional/checksum/filetest_001_pos.ksh | 125 + .../functional/checksum/run_edonr_test.ksh | 30 + .../functional/checksum/run_sha2_test.ksh | 30 + .../functional/checksum/run_skein_test.ksh | 30 + .../tests/functional/checksum/setup.ksh | 31 + .../tests/functional/checksum/sha2_test.c | 265 +++ .../tests/functional/checksum/skein_test.c | 342 +++ .../cli_root/zfs_set/checksum_001_pos.ksh | 2 +- .../cli_root/zpool_get/zpool_get.cfg | 6 +- 78 files changed, 8996 insertions(+), 203 deletions(-) create mode 100644 include/sys/edonr.h rename {module/icp/include/sha2 => include/sys}/sha2.h (70%) create mode 100644 include/sys/skein.h create mode 100644 module/icp/algs/edonr/edonr.c create mode 100644 module/icp/algs/edonr/edonr_byteorder.h create mode 100644 module/icp/algs/skein/THIRDPARTYLICENSE create mode 100644 module/icp/algs/skein/THIRDPARTYLICENSE.descrip create mode 100644 module/icp/algs/skein/skein.c create mode 100644 module/icp/algs/skein/skein_block.c create mode 100644 module/icp/algs/skein/skein_impl.h create mode 100644 module/icp/algs/skein/skein_iv.c create mode 100644 module/icp/algs/skein/skein_port.h create mode 100644 module/icp/asm-x86_64/sha2/sha512_impl.S create mode 100644 module/icp/io/edonr_mod.c create mode 100644 module/icp/io/skein_mod.c create mode 100644 module/zfs/edonr_zfs.c create mode 100644 module/zfs/skein_zfs.c create mode 100644 tests/zfs-tests/tests/functional/checksum/.gitignore create mode 100644 tests/zfs-tests/tests/functional/checksum/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/checksum/cleanup.ksh create mode 100644 tests/zfs-tests/tests/functional/checksum/edonr_test.c create mode 100755 tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh create mode 100755 tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh create mode 100755 tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh create mode 100755 tests/zfs-tests/tests/functional/checksum/setup.ksh create mode 100644 tests/zfs-tests/tests/functional/checksum/sha2_test.c create mode 100644 tests/zfs-tests/tests/functional/checksum/skein_test.c diff --git a/Makefile.am b/Makefile.am index fe4285b3f..abc98e4ed 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,7 +6,7 @@ include config/tgz.am SUBDIRS = include rpm if CONFIG_USER -SUBDIRS += udev etc man scripts tests lib cmd contrib +SUBDIRS += udev etc man scripts lib tests cmd contrib endif if CONFIG_KERNEL SUBDIRS += module diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 1b77b6cee..912a7f70e 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -5654,16 +5654,16 @@ ztest_fletcher(ztest_ds_t *zd, uint64_t id) *ptr = ztest_random(UINT_MAX); VERIFY0(fletcher_4_impl_set("scalar")); - fletcher_4_native(buf, size, &zc_ref); - fletcher_4_byteswap(buf, size, &zc_ref_byteswap); + fletcher_4_native(buf, size, NULL, &zc_ref); + fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap); VERIFY0(fletcher_4_impl_set("cycle")); while (run_count-- > 0) { zio_cksum_t zc; zio_cksum_t zc_byteswap; - fletcher_4_byteswap(buf, size, &zc_byteswap); - fletcher_4_native(buf, size, &zc); + fletcher_4_byteswap(buf, size, NULL, &zc_byteswap); + fletcher_4_native(buf, size, NULL, &zc); VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc))); VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap, diff --git a/configure.ac b/configure.ac index c7685550d..edcf29958 100644 --- a/configure.ac +++ b/configure.ac @@ -49,6 +49,7 @@ AC_PROG_INSTALL AC_PROG_CC AC_PROG_LIBTOOL AM_PROG_AS +AM_PROG_CC_C_O ZFS_AC_LICENSE ZFS_AC_PACKAGE @@ -178,6 +179,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cache/Makefile tests/zfs-tests/tests/functional/cachefile/Makefile tests/zfs-tests/tests/functional/casenorm/Makefile + tests/zfs-tests/tests/functional/checksum/Makefile tests/zfs-tests/tests/functional/clean_mirror/Makefile tests/zfs-tests/tests/functional/cli_root/Makefile tests/zfs-tests/tests/functional/cli_root/zdb/Makefile diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 40cd0597c..96d77c7b3 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -31,6 +31,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/dsl_scan.h \ $(top_srcdir)/include/sys/dsl_synctask.h \ $(top_srcdir)/include/sys/dsl_userhold.h \ + $(top_srcdir)/include/sys/edonr.h \ $(top_srcdir)/include/sys/efi_partition.h \ $(top_srcdir)/include/sys/metaslab.h \ $(top_srcdir)/include/sys/metaslab_impl.h \ @@ -46,6 +47,8 @@ COMMON_H = \ $(top_srcdir)/include/sys/sa.h \ $(top_srcdir)/include/sys/sa_impl.h \ $(top_srcdir)/include/sys/sdt.h \ + $(top_srcdir)/include/sys/sha2.h \ + $(top_srcdir)/include/sys/skein.h \ $(top_srcdir)/include/sys/spa_boot.h \ $(top_srcdir)/include/sys/space_map.h \ $(top_srcdir)/include/sys/space_reftree.h \ diff --git a/include/sys/crypto/icp.h b/include/sys/crypto/icp.h index c7bb78e83..d8948e022 100644 --- a/include/sys/crypto/icp.h +++ b/include/sys/crypto/icp.h @@ -29,12 +29,18 @@ int aes_mod_init(void); int aes_mod_fini(void); +int edonr_mod_init(void); +int edonr_mod_fini(void); + int sha1_mod_init(void); int sha1_mod_fini(void); int sha2_mod_init(void); int sha2_mod_fini(void); +int skein_mod_init(void); +int skein_mod_fini(void); + int icp_init(void); void icp_fini(void); diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 4efab7c72..b67acb52c 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -25,6 +25,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -317,6 +318,7 @@ typedef struct dmu_buf { #define DMU_POOL_FREE_BPOBJ "free_bpobj" #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" +#define DMU_POOL_CHECKSUM_SALT "org.illumos:checksum_salt" #define DMU_POOL_VDEV_ZAP_MAP "com.delphix:vdev_zap_map" /* diff --git a/include/sys/edonr.h b/include/sys/edonr.h new file mode 100644 index 000000000..79b7cd8c7 --- /dev/null +++ b/include/sys/edonr.h @@ -0,0 +1,98 @@ +/* + * IDI,NTNU + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (C) 2009, 2010, Jorn Amundsen + * + * Tweaked Edon-R implementation for SUPERCOP, based on NIST API. + * + * $Id: edonr.h 517 2013-02-17 20:34:39Z joern $ + */ +/* + * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved + */ + +#ifndef _SYS_EDONR_H_ +#define _SYS_EDONR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +#include +#else +#include /* uint32_t... */ +#include /* size_t ... */ +#endif + +/* + * EdonR allows to call EdonRUpdate() consecutively only if the total length + * of stored unprocessed data and the new supplied data is less than or equal + * to the BLOCK_SIZE on which the compression functions operates. + * Otherwise an assertion failure is invoked. + */ + +/* Specific algorithm definitions */ +#define EdonR224_DIGEST_SIZE 28 +#define EdonR224_BLOCK_SIZE 64 +#define EdonR256_DIGEST_SIZE 32 +#define EdonR256_BLOCK_SIZE 64 +#define EdonR384_DIGEST_SIZE 48 +#define EdonR384_BLOCK_SIZE 128 +#define EdonR512_DIGEST_SIZE 64 +#define EdonR512_BLOCK_SIZE 128 + +#define EdonR256_BLOCK_BITSIZE 512 +#define EdonR512_BLOCK_BITSIZE 1024 + +typedef struct { + uint32_t DoublePipe[16]; + uint8_t LastPart[EdonR256_BLOCK_SIZE * 2]; +} EdonRData256; +typedef struct { + uint64_t DoublePipe[16]; + uint8_t LastPart[EdonR512_BLOCK_SIZE * 2]; +} EdonRData512; + +typedef struct { + size_t hashbitlen; + + /* + algorithm specific parameters */ + int unprocessed_bits; + uint64_t bits_processed; + union { + EdonRData256 p256[1]; + EdonRData512 p512[1]; + } pipe[1]; +} EdonRState; + +void EdonRInit(EdonRState *state, size_t hashbitlen); +void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen); +void EdonRFinal(EdonRState *state, uint8_t *hashval); +void EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen, + uint8_t *hashval); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_EDONR_H_ */ diff --git a/module/icp/include/sha2/sha2.h b/include/sys/sha2.h similarity index 70% rename from module/icp/include/sha2/sha2.h rename to include/sys/sha2.h index 8e53987a7..9039835f1 100644 --- a/module/icp/include/sha2/sha2.h +++ b/include/sys/sha2.h @@ -27,7 +27,11 @@ #ifndef _SYS_SHA2_H #define _SYS_SHA2_H +#ifdef _KERNEL #include /* for uint_* */ +#else +#include +#endif #ifdef __cplusplus extern "C" { @@ -37,12 +41,27 @@ extern "C" { #define SHA2_HMAC_MAX_KEY_LEN INT_MAX /* SHA2-HMAC max key length in bytes */ #define SHA256_DIGEST_LENGTH 32 /* SHA256 digest length in bytes */ +#define SHA384_DIGEST_LENGTH 48 /* SHA384 digest length in bytes */ +#define SHA512_DIGEST_LENGTH 64 /* SHA512 digest length in bytes */ + +/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */ +#define SHA512_224_DIGEST_LENGTH 28 /* SHA512/224 digest length */ +#define SHA512_256_DIGEST_LENGTH 32 /* SHA512/256 digest length */ #define SHA256_HMAC_BLOCK_SIZE 64 /* SHA256-HMAC block size */ +#define SHA512_HMAC_BLOCK_SIZE 128 /* SHA512-HMAC block size */ #define SHA256 0 #define SHA256_HMAC 1 #define SHA256_HMAC_GEN 2 +#define SHA384 3 +#define SHA384_HMAC 4 +#define SHA384_HMAC_GEN 5 +#define SHA512 6 +#define SHA512_HMAC 7 +#define SHA512_HMAC_GEN 8 +#define SHA512_224 9 +#define SHA512_256 10 /* * SHA2 context. @@ -87,6 +106,18 @@ extern void SHA256Update(SHA256_CTX *, const void *, size_t); extern void SHA256Final(void *, SHA256_CTX *); +extern void SHA384Init(SHA384_CTX *); + +extern void SHA384Update(SHA384_CTX *, const void *, size_t); + +extern void SHA384Final(void *, SHA384_CTX *); + +extern void SHA512Init(SHA512_CTX *); + +extern void SHA512Update(SHA512_CTX *, const void *, size_t); + +extern void SHA512Final(void *, SHA512_CTX *); + #ifdef _SHA2_IMPL /* * The following types/functions are all private to the implementation @@ -105,6 +136,14 @@ typedef enum sha2_mech_type { SHA256_MECH_INFO_TYPE, /* SUN_CKM_SHA256 */ SHA256_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC */ SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */ + SHA384_MECH_INFO_TYPE, /* SUN_CKM_SHA384 */ + SHA384_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC */ + SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */ + SHA512_MECH_INFO_TYPE, /* SUN_CKM_SHA512 */ + SHA512_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC */ + SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */ + SHA512_224_MECH_INFO_TYPE, /* SUN_CKM_SHA512_224 */ + SHA512_256_MECH_INFO_TYPE /* SUN_CKM_SHA512_256 */ } sha2_mech_type_t; #endif /* _SHA2_IMPL */ diff --git a/include/sys/skein.h b/include/sys/skein.h new file mode 100644 index 000000000..2f649d6b2 --- /dev/null +++ b/include/sys/skein.h @@ -0,0 +1,183 @@ +/* + * Interface declarations for Skein hashing. + * Source code author: Doug Whiting, 2008. + * This algorithm and source code is released to the public domain. + * + * The following compile-time switches may be defined to control some + * tradeoffs between speed, code size, error checking, and security. + * + * The "default" note explains what happens when the switch is not defined. + * + * SKEIN_DEBUG -- make callouts from inside Skein code + * to examine/display intermediate values. + * [default: no callouts (no overhead)] + * + * SKEIN_ERR_CHECK -- how error checking is handled inside Skein + * code. If not defined, most error checking + * is disabled (for performance). Otherwise, + * the switch value is interpreted as: + * 0: use assert() to flag errors + * 1: return SKEIN_FAIL to flag errors + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ +#ifndef _SYS_SKEIN_H_ +#define _SYS_SKEIN_H_ + +#ifdef _KERNEL +#include /* get size_t definition */ +#else +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + SKEIN_SUCCESS = 0, /* return codes from Skein calls */ + SKEIN_FAIL = 1, + SKEIN_BAD_HASHLEN = 2 +}; + +#define SKEIN_MODIFIER_WORDS (2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS (4) +#define SKEIN_512_STATE_WORDS (8) +#define SKEIN1024_STATE_WORDS (16) +#define SKEIN_MAX_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES (8 * SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES (8 * SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES (8 * SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64 * SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64 * SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64 * SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES (8 * SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES (8 * SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES (8 * SKEIN1024_STATE_WORDS) + +typedef struct { + size_t hashBitLen; /* size of hash result, in bits */ + size_t bCnt; /* current byte count in buffer b[] */ + /* tweak words: T[0]=byte cnt, T[1]=flags */ + uint64_t T[SKEIN_MODIFIER_WORDS]; +} Skein_Ctxt_Hdr_t; + +typedef struct { /* 256-bit Skein hash context structure */ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + uint64_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */ + /* partial block buffer (8-byte aligned) */ + uint8_t b[SKEIN_256_BLOCK_BYTES]; +} Skein_256_Ctxt_t; + +typedef struct { /* 512-bit Skein hash context structure */ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + uint64_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ + /* partial block buffer (8-byte aligned) */ + uint8_t b[SKEIN_512_BLOCK_BYTES]; +} Skein_512_Ctxt_t; + +typedef struct { /* 1024-bit Skein hash context structure */ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + uint64_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */ + /* partial block buffer (8-byte aligned) */ + uint8_t b[SKEIN1024_BLOCK_BYTES]; +} Skein1024_Ctxt_t; + +/* Skein APIs for (incremental) "straight hashing" */ +int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen); +int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen); +int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen); + +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, + size_t msgByteCnt); +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, + size_t msgByteCnt); +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, + size_t msgByteCnt); + +int Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal); +int Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal); +int Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal); + +/* + * Skein APIs for "extended" initialization: MAC keys, tree hashing. + * After an InitExt() call, just use Update/Final calls as with Init(). + * + * Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. + * When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, + * the results of InitExt() are identical to calling Init(). + * The function Init() may be called once to "precompute" the IV for + * a given hashBitLen value, then by saving a copy of the context + * the IV computation may be avoided in later calls. + * Similarly, the function InitExt() may be called once per MAC key + * to precompute the MAC IV, then a copy of the context saved and + * reused for each new MAC computation. + */ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, + uint64_t treeInfo, const uint8_t *key, size_t keyBytes); +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, + uint64_t treeInfo, const uint8_t *key, size_t keyBytes); +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, + uint64_t treeInfo, const uint8_t *key, size_t keyBytes); + +/* + * Skein APIs for MAC and tree hash: + * Final_Pad: pad, do final block, but no OUTPUT type + * Output: do just the output stage + */ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal); +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal); +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal); + +#ifndef SKEIN_TREE_HASH +#define SKEIN_TREE_HASH (1) +#endif +#if SKEIN_TREE_HASH +int Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal); +int Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal); +int Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal); +#endif + +/* + * When you initialize a Skein KCF hashing method you can pass this param + * structure in cm_param to fine-tune the algorithm's defaults. + */ +typedef struct skein_param { + size_t sp_digest_bitlen; /* length of digest in bits */ +} skein_param_t; + +/* Module definitions */ +#ifdef SKEIN_MODULE_IMPL +#define CKM_SKEIN_256 "CKM_SKEIN_256" +#define CKM_SKEIN_512 "CKM_SKEIN_512" +#define CKM_SKEIN1024 "CKM_SKEIN1024" +#define CKM_SKEIN_256_MAC "CKM_SKEIN_256_MAC" +#define CKM_SKEIN_512_MAC "CKM_SKEIN_512_MAC" +#define CKM_SKEIN1024_MAC "CKM_SKEIN1024_MAC" + +typedef enum skein_mech_type { + SKEIN_256_MECH_INFO_TYPE, + SKEIN_512_MECH_INFO_TYPE, + SKEIN1024_MECH_INFO_TYPE, + SKEIN_256_MAC_MECH_INFO_TYPE, + SKEIN_512_MAC_MECH_INFO_TYPE, + SKEIN1024_MAC_MECH_INFO_TYPE +} skein_mech_type_t; + +#define VALID_SKEIN_DIGEST_MECH(__mech) \ + ((int)(__mech) >= SKEIN_256_MECH_INFO_TYPE && \ + (__mech) <= SKEIN1024_MECH_INFO_TYPE) +#define VALID_SKEIN_MAC_MECH(__mech) \ + ((int)(__mech) >= SKEIN_256_MAC_MECH_INFO_TYPE && \ + (__mech) <= SKEIN1024_MAC_MECH_INFO_TYPE) +#endif /* SKEIN_MODULE_IMPL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SKEIN_H_ */ diff --git a/include/sys/spa.h b/include/sys/spa.h index 0c71cca68..3d0b962e6 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. */ #ifndef _SYS_SPA_H @@ -147,6 +148,14 @@ typedef struct dva { } dva_t; +/* + * Some checksums/hashes need a 256-bit initialization salt. This salt is kept + * secret and is suitable for use in MAC algorithms as the key. + */ +typedef struct zio_cksum_salt { + uint8_t zcs_bytes[32]; +} zio_cksum_salt_t; + /* * Each block is described by its DVAs, time of birth, checksum, etc. * The word-by-word, bit-by-bit layout of the blkptr is as follows: diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 59cb44de2..7b9e1ee0c 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ @@ -172,6 +173,10 @@ struct spa { uint64_t spa_syncing_txg; /* txg currently syncing */ bpobj_t spa_deferred_bpobj; /* deferred-free bplist */ bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */ + zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ + /* checksum context templates */ + kmutex_t spa_cksum_tmpls_lock; + void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; uberblock_t spa_ubsync; /* last synced uberblock */ uberblock_t spa_uberblock; /* current uberblock */ boolean_t spa_extreme_rewind; /* rewind past deferred frees */ diff --git a/include/sys/zio.h b/include/sys/zio.h index 51b51fbec..22001559c 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -80,6 +80,10 @@ enum zio_checksum { ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, ZIO_CHECKSUM_ZILOG2, + ZIO_CHECKSUM_NOPARITY, + ZIO_CHECKSUM_SHA512, + ZIO_CHECKSUM_SKEIN, + ZIO_CHECKSUM_EDONR, ZIO_CHECKSUM_FUNCTIONS }; diff --git a/include/sys/zio_checksum.h b/include/sys/zio_checksum.h index 04573ba54..b4c2c8c08 100644 --- a/include/sys/zio_checksum.h +++ b/include/sys/zio_checksum.h @@ -20,13 +20,15 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright (c) 2014, 2015 by Delphix. All rights reserved. + * Copyright Saso Kiselkov 2013, All rights reserved. */ #ifndef _SYS_ZIO_CHECKSUM_H #define _SYS_ZIO_CHECKSUM_H #include +#include #ifdef __cplusplus extern "C" { @@ -35,17 +37,36 @@ extern "C" { /* * Signature for checksum functions. */ -typedef void zio_checksum_func_t(const void *, uint64_t, zio_cksum_t *); +typedef void zio_checksum_func_t(const void *, uint64_t, const void *, + zio_cksum_t *); +typedef void zio_checksum_t(const void *data, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp); +typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt); +typedef void zio_checksum_tmpl_free_t(void *ctx_template); + +typedef enum zio_checksum_flags { + /* Strong enough for metadata? */ + ZCHECKSUM_FLAG_METADATA = (1 << 1), + /* ZIO embedded checksum */ + ZCHECKSUM_FLAG_EMBEDDED = (1 << 2), + /* Strong enough for dedup (without verification)? */ + ZCHECKSUM_FLAG_DEDUP = (1 << 3), + /* Uses salt value */ + ZCHECKSUM_FLAG_SALTED = (1 << 4), + /* Strong enough for nopwrite? */ + ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) +} zio_checksum_flags_t; /* * Information about each checksum function. */ typedef const struct zio_checksum_info { - zio_checksum_func_t *ci_func[2]; /* checksum function per byteorder */ - int ci_correctable; /* number of correctable bits */ - int ci_eck; /* uses zio embedded checksum? */ - boolean_t ci_dedup; /* strong enough for dedup? */ - char *ci_name; /* descriptive name */ + /* checksum function for each byteorder */ + zio_checksum_t *ci_func[2]; + zio_checksum_tmpl_init_t *ci_tmpl_init; + zio_checksum_tmpl_free_t *ci_tmpl_free; + zio_checksum_flags_t ci_flags; + char *ci_name; /* descriptive name */ } zio_checksum_info_t; typedef struct zio_bad_cksum { @@ -62,7 +83,21 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS]; /* * Checksum routines. */ -extern zio_checksum_func_t zio_checksum_SHA256; +extern zio_checksum_t zio_checksum_SHA256; +extern zio_checksum_t zio_checksum_SHA512_native; +extern zio_checksum_t zio_checksum_SHA512_byteswap; + +/* Skein */ +extern zio_checksum_t zio_checksum_skein_native; +extern zio_checksum_t zio_checksum_skein_byteswap; +extern zio_checksum_tmpl_init_t zio_checksum_skein_tmpl_init; +extern zio_checksum_tmpl_free_t zio_checksum_skein_tmpl_free; + +/* Edon-R */ +extern zio_checksum_t zio_checksum_edonr_native; +extern zio_checksum_t zio_checksum_edonr_byteswap; +extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init; +extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free; extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, void *, uint64_t, uint64_t, zio_bad_cksum_t *); @@ -72,6 +107,8 @@ extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum, void *, uint64_t, uint64_t, zio_bad_cksum_t *); extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); extern enum zio_checksum spa_dedup_checksum(spa_t *spa); +extern void zio_checksum_templates_free(spa_t *spa); +extern spa_feature_t zio_checksum_to_feature(enum zio_checksum cksum); #ifdef __cplusplus } diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 41cfdf807..f05480181 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -51,6 +51,9 @@ typedef enum spa_feature { SPA_FEATURE_FS_SS_LIMIT, SPA_FEATURE_LARGE_BLOCKS, SPA_FEATURE_LARGE_DNODE, + SPA_FEATURE_SHA512, + SPA_FEATURE_SKEIN, + SPA_FEATURE_EDONR, SPA_FEATURES } spa_feature_t; diff --git a/include/zfs_fletcher.h b/include/zfs_fletcher.h index f0cfbd573..83f92a096 100644 --- a/include/zfs_fletcher.h +++ b/include/zfs_fletcher.h @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ #ifndef _ZFS_FLETCHER_H #define _ZFS_FLETCHER_H @@ -45,11 +48,11 @@ extern "C" { * checksum method is added. This method will ignore last (size % 4) bytes of * the data buffer. */ -void fletcher_2_native(const void *, uint64_t, zio_cksum_t *); -void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *); -void fletcher_4_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *); void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *); -void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); void fletcher_4_incremental_native(const void *, uint64_t, zio_cksum_t *); void fletcher_4_incremental_byteswap(const void *, uint64_t, diff --git a/lib/libicp/Makefile.am b/lib/libicp/Makefile.am index 41457fd52..0852a583a 100644 --- a/lib/libicp/Makefile.am +++ b/lib/libicp/Makefile.am @@ -20,7 +20,8 @@ ASM_SOURCES_AS = \ asm-x86_64/aes/aes_intel.S \ asm-x86_64/modes/gcm_intel.S \ asm-x86_64/sha1/sha1-x86_64.S \ - asm-x86_64/sha2/sha256_impl.S + asm-x86_64/sha2/sha256_impl.S \ + asm-x86_64/sha2/sha512_impl.S endif if TARGET_ASM_I386 @@ -46,6 +47,7 @@ KERNEL_C = \ api/kcf_mac.c \ algs/aes/aes_impl.c \ algs/aes/aes_modes.c \ + algs/edonr/edonr.c \ algs/modes/modes.c \ algs/modes/cbc.c \ algs/modes/gcm.c \ @@ -54,10 +56,15 @@ KERNEL_C = \ algs/modes/ecb.c \ algs/sha1/sha1.c \ algs/sha2/sha2.c \ + algs/skein/skein.c \ + algs/skein/skein_block.c \ + algs/skein/skein_iv.c \ illumos-crypto.c \ io/aes.c \ + io/edonr_mod.c \ io/sha1_mod.c \ io/sha2_mod.c \ + io/skein_mod.c \ os/modhash.c \ os/modconf.c \ core/kcf_sched.c \ diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index 6226872e5..bd89ad94f 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -73,6 +73,14 @@ do { \ #undef assert #endif +/* Compile time assert */ +#define CTASSERT_GLOBAL(x) _CTASSERT(x, __LINE__) +#define CTASSERT(x) { _CTASSERT(x, __LINE__); } +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) \ + typedef char __attribute__((unused)) \ + __compile_time_assertion__ ## y[(x) ? 1 : -1] + #ifdef NDEBUG #define ASSERT3S(x, y, z) ((void)0) #define ASSERT3U(x, y, z) ((void)0) diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 6b09cb6da..5ecf96985 100755 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -1477,6 +1477,12 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, "property setting is not allowed on " "bootable datasets")); (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); + } else if (prop == ZFS_PROP_CHECKSUM || + prop == ZFS_PROP_DEDUP) { + (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property setting is not allowed on " + "root pools")); + (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); } else { (void) zfs_standard_error(hdl, err, errbuf); } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index e409899a2..f70e34107 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -61,6 +61,7 @@ #include #include #include +#include /* in libzfs_dataset.c */ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); @@ -365,10 +366,11 @@ cksummer(void *arg) if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + SHA256_CTX ctx; zio_cksum_t tmpsha256; zio_checksum_SHA256(buf, - payload_size, &tmpsha256); + payload_size, &ctx, &tmpsha256); drrw->drr_key.ddk_cksum.zc_word[0] = BE_64(tmpsha256.zc_word[0]); diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 351ddfeac..c2f5a50b1 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -61,6 +61,7 @@ KERNEL_C = \ dsl_synctask.c \ dsl_destroy.c \ dsl_userhold.c \ + edonr_zfs.c \ fm.c \ gzip.c \ lzjb.c \ @@ -73,6 +74,7 @@ KERNEL_C = \ rrwlock.c \ sa.c \ sha256.c \ + skein_zfs.c \ spa.c \ spa_boot.c \ spa_config.c \ diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 index fa04d6e81..dcfb30d18 100644 --- a/man/man5/zpool-features.5 +++ b/man/man5/zpool-features.5 @@ -1,5 +1,5 @@ '\" te -.\" Copyright (c) 2013 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2015 by Delphix. All rights reserved. .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. .\" Copyright (c) 2014, Joyent, Inc. All rights reserved. .\" The contents of this file are subject to the terms of the Common Development @@ -457,5 +457,111 @@ allow more data to be stored in the bonus buffer, thus potentially improving performance by avoiding the use of spill blocks. .RE +\fB\fBsha512\fR\fR +.ad +.RS 4n +.TS +l l . +GUID org.illumos:sha512 +READ\-ONLY COMPATIBLE no +DEPENDENCIES none +.TE + +This feature enables the use of the SHA-512/256 truncated hash algorithm +(FIPS 180-4) for checksum and dedup. The native 64-bit arithmetic of +SHA-512 provides an approximate 50% performance boost over SHA-256 on +64-bit hardware and is thus a good minimum-change replacement candidate +for systems where hash performance is important, but these systems +cannot for whatever reason utilize the faster \fBskein\fR and +\fBedonr\fR algorithms. + +When the \fBsha512\fR feature is set to \fBenabled\fR, the administrator +can turn on the \fBsha512\fR checksum on any dataset using the +\fBzfs set checksum=sha512\fR(1M) command. This feature becomes +\fBactive\fR once a \fBchecksum\fR property has been set to \fBsha512\fR, +and will return to being \fBenabled\fR once all filesystems that have +ever had their checksum set to \fBsha512\fR are destroyed. + +Booting off of pools utilizing SHA-512/256 is supported (provided that +the updated GRUB stage2 module is installed). + +.RE + +.sp +.ne 2 +.na +\fB\fBskein\fR\fR +.ad +.RS 4n +.TS +l l . +GUID org.illumos:skein +READ\-ONLY COMPATIBLE no +DEPENDENCIES none +.TE + +This feature enables the use of the Skein hash algorithm for checksum +and dedup. Skein is a high-performance secure hash algorithm that was a +finalist in the NIST SHA-3 competition. It provides a very high security +margin and high performance on 64-bit hardware (80% faster than +SHA-256). This implementation also utilizes the new salted checksumming +functionality in ZFS, which means that the checksum is pre-seeded with a +secret 256-bit random key (stored on the pool) before being fed the data +block to be checksummed. Thus the produced checksums are unique to a +given pool, preventing hash collision attacks on systems with dedup. + +When the \fBskein\fR feature is set to \fBenabled\fR, the administrator +can turn on the \fBskein\fR checksum on any dataset using the +\fBzfs set checksum=skein\fR(1M) command. This feature becomes +\fBactive\fR once a \fBchecksum\fR property has been set to \fBskein\fR, +and will return to being \fBenabled\fR once all filesystems that have +ever had their checksum set to \fBskein\fR are destroyed. + +Booting off of pools using \fBskein\fR is \fBNOT\fR supported +-- any attempt to enable \fBskein\fR on a root pool will fail with an +error. + +.RE + +.sp +.ne 2 +.na +\fB\fBedonr\fR\fR +.ad +.RS 4n +.TS +l l . +GUID org.illumos:edonr +READ\-ONLY COMPATIBLE no +DEPENDENCIES none +.TE + +This feature enables the use of the Edon-R hash algorithm for checksum, +including for nopwrite (if compression is also enabled, an overwrite of +a block whose checksum matches the data being written will be ignored). +In an abundance of caution, Edon-R can not be used with dedup +(without verification). + +Edon-R is a very high-performance hash algorithm that was part +of the NIST SHA-3 competition. It provides extremely high hash +performance (over 350% faster than SHA-256), but was not selected +because of its unsuitability as a general purpose secure hash algorithm. +This implementation utilizes the new salted checksumming functionality +in ZFS, which means that the checksum is pre-seeded with a secret +256-bit random key (stored on the pool) before being fed the data block +to be checksummed. Thus the produced checksums are unique to a given +pool. + +When the \fBedonr\fR feature is set to \fBenabled\fR, the administrator +can turn on the \fBedonr\fR checksum on any dataset using the +\fBzfs set checksum=edonr\fR(1M) command. This feature becomes +\fBactive\fR once a \fBchecksum\fR property has been set to \fBedonr\fR, +and will return to being \fBenabled\fR once all filesystems that have +ever had their checksum set to \fBedonr\fR are destroyed. + +Booting off of pools using \fBedonr\fR is \fBNOT\fR supported +-- any attempt to enable \fBedonr\fR on a root pool will fail with an +error. + .SH "SEE ALSO" \fBzpool\fR(8) diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index e13fc1a52..e543ba51d 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -837,12 +837,23 @@ The values \fBon\fR and \fBnoauto\fR are equivalent to the \fBauto\fR and \fBnoa .sp .ne 2 .na -\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2\fR | \fBfletcher4\fR | \fBsha256\fR\fR +\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2\fR | \fBfletcher4\fR | \fBsha256\fR | \fBnoparity\fR | \fBsha512\fR | \fBskein\fR | \fBedonr\fR\fR .ad .sp .6 .RS 4n -Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice. +Controls the checksum used to verify data integrity. The default value is +\fBon\fR, which automatically selects an appropriate algorithm (currently, +\fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR +disables integrity checking on user data. The value \fBnoparity\fR not only +disables integrity but also disables maintaining parity for user data. +This setting is used internally by a dump device residing on a RAID-Z pool and +should not be used by any other dataset. Disabling checksums is \fBNOT\fR a +recommended practice. .sp +The \fBsha512\fR, \fBskein\fR, and \fBedonr\fR checksum algorithms require +enabling the appropriate features on the pool. Please see zpool-features for +more information on these algorithms. + Changing this property affects only newly-written data. .RE diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index 4be03dbae..b822635b7 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -12,6 +12,7 @@ ASM_SOURCES += asm-x86_64/aes/aes_intel.o ASM_SOURCES += asm-x86_64/modes/gcm_intel.o ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o +ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o endif ifeq ($(TARGET_ASM_DIR), asm-i386) @@ -43,8 +44,10 @@ $(MODULE)-objs += core/kcf_mech_tabs.o $(MODULE)-objs += core/kcf_prov_lib.o $(MODULE)-objs += spi/kcf_spi.o $(MODULE)-objs += io/aes.o +$(MODULE)-objs += io/edonr_mod.o $(MODULE)-objs += io/sha1_mod.o $(MODULE)-objs += io/sha2_mod.o +$(MODULE)-objs += io/skein_mod.o $(MODULE)-objs += os/modhash.o $(MODULE)-objs += os/modconf.o $(MODULE)-objs += algs/modes/cbc.o @@ -55,8 +58,13 @@ $(MODULE)-objs += algs/modes/gcm.o $(MODULE)-objs += algs/modes/modes.o $(MODULE)-objs += algs/aes/aes_impl.o $(MODULE)-objs += algs/aes/aes_modes.o +$(MODULE)-objs += algs/edonr/edonr.o $(MODULE)-objs += algs/sha1/sha1.o $(MODULE)-objs += algs/sha2/sha2.o +$(MODULE)-objs += algs/sha1/sha1.o +$(MODULE)-objs += algs/skein/skein.o +$(MODULE)-objs += algs/skein/skein_block.o +$(MODULE)-objs += algs/skein/skein_iv.o $(MODULE)-objs += $(ASM_SOURCES) ICP_DIRS = \ @@ -67,9 +75,11 @@ ICP_DIRS = \ os \ algs \ algs/aes \ + algs/edonr \ algs/modes \ algs/sha1 \ algs/sha2 \ + algs/skein \ asm-x86_64 \ asm-x86_64/aes \ asm-x86_64/modes \ diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c new file mode 100644 index 000000000..8ae989890 --- /dev/null +++ b/module/icp/algs/edonr/edonr.c @@ -0,0 +1,751 @@ +/* + * IDI,NTNU + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (C) 2009, 2010, Jorn Amundsen + * Tweaked Edon-R implementation for SUPERCOP, based on NIST API. + * + * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $ + */ +/* + * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved + */ + +/* determine where we can get bcopy/bzero declarations */ +#ifdef _KERNEL +#include +#else +#include +#endif +#include +#include + +/* big endian support, provides no-op's if run on little endian hosts */ +#include "edonr_byteorder.h" + +#define hashState224(x) ((x)->pipe->p256) +#define hashState256(x) ((x)->pipe->p256) +#define hashState384(x) ((x)->pipe->p512) +#define hashState512(x) ((x)->pipe->p512) + +/* shift and rotate shortcuts */ +#define shl(x, n) ((x) << n) +#define shr(x, n) ((x) >> n) + +#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) + +#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +#if !defined(__C99_RESTRICT) +#define restrict /* restrict */ +#endif + +#define EDONR_VALID_HASHBITLEN(x) \ + ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224) + +/* EdonR224 initial double chaining pipe */ +static const uint32_t i224p2[16] = { + 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful, + 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful, + 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful, + 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful, +}; + +/* EdonR256 initial double chaining pipe */ +static const uint32_t i256p2[16] = { + 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful, + 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful, + 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful, + 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful, +}; + +/* EdonR384 initial double chaining pipe */ +static const uint64_t i384p2[16] = { + 0x0001020304050607ull, 0x08090a0b0c0d0e0full, + 0x1011121314151617ull, 0x18191a1b1c1d1e1full, + 0x2021222324252627ull, 0x28292a2b2c2d2e2full, + 0x3031323334353637ull, 0x38393a3b3c3d3e3full, + 0x4041424344454647ull, 0x48494a4b4c4d4e4full, + 0x5051525354555657ull, 0x58595a5b5c5d5e5full, + 0x6061626364656667ull, 0x68696a6b6c6d6e6full, + 0x7071727374757677ull, 0x78797a7b7c7d7e7full +}; + +/* EdonR512 initial double chaining pipe */ +static const uint64_t i512p2[16] = { + 0x8081828384858687ull, 0x88898a8b8c8d8e8full, + 0x9091929394959697ull, 0x98999a9b9c9d9e9full, + 0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull, + 0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull, + 0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull, + 0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull, + 0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull, + 0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull +}; + +/* + * First Latin Square + * 0 7 1 3 2 4 6 5 + * 4 1 7 6 3 0 5 2 + * 7 0 4 2 5 3 1 6 + * 1 4 0 5 6 2 7 3 + * 2 3 6 7 1 5 0 4 + * 5 2 3 1 7 6 4 0 + * 3 6 5 0 4 7 2 1 + * 6 5 2 4 0 1 3 7 + */ +#define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \ +{ \ + uint32_t x04, x17, x23, x56, x07, x26; \ + x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ + s0 = c + x07 + x2; \ + s1 = rotl32(x07 + x3, 4); \ + s2 = rotl32(x07 + x6, 8); \ + x23 = x2 + x3; \ + s5 = rotl32(x04 + x23 + x5, 22); \ + x56 = x5 + x6; \ + s6 = rotl32(x17 + x56 + x0, 24); \ + x26 = x23+x56; \ + s3 = rotl32(x26 + x7, 13); \ + s4 = rotl32(x26 + x1, 17); \ + s7 = rotl32(x26 + x4, 29); \ +} + +#define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \ +{ \ + uint64_t x04, x17, x23, x56, x07, x26; \ + x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ + s0 = c + x07 + x2; \ + s1 = rotl64(x07 + x3, 5); \ + s2 = rotl64(x07 + x6, 15); \ + x23 = x2 + x3; \ + s5 = rotl64(x04 + x23 + x5, 40); \ + x56 = x5 + x6; \ + s6 = rotl64(x17 + x56 + x0, 50); \ + x26 = x23+x56; \ + s3 = rotl64(x26 + x7, 22); \ + s4 = rotl64(x26 + x1, 31); \ + s7 = rotl64(x26 + x4, 59); \ +} + +/* + * Second Orthogonal Latin Square + * 0 4 2 3 1 6 5 7 + * 7 6 3 2 5 4 1 0 + * 5 3 1 6 0 2 7 4 + * 1 0 5 4 3 7 2 6 + * 2 1 0 7 4 5 6 3 + * 3 5 7 0 6 1 4 2 + * 4 7 6 1 2 0 3 5 + * 6 2 4 5 7 3 0 1 + */ +#define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \ +{ \ + uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \ + y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ + t0 = ~c + y05 + y7; \ + t2 = rotl32(y05 + y3, 9); \ + y34 = y3+y4, y04 = y01+y34; \ + t1 = rotl32(y04 + y6, 5); \ + t4 = rotl32(y04 + y5, 15); \ + y67 = y6+y7, y37 = y34+y67; \ + t3 = rotl32(y37 + y2, 11); \ + t7 = rotl32(y37 + y0, 27); \ + y27 = y25+y67; \ + t5 = rotl32(y27 + y4, 20); \ + t6 = rotl32(y27 + y1, 25); \ +} + +#define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \ +{ \ + uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \ + y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ + t0 = ~c + y05 + y7; \ + t2 = rotl64(y05 + y3, 19); \ + y34 = y3+y4, y04 = y01+y34; \ + t1 = rotl64(y04 + y6, 10); \ + t4 = rotl64(y04 + y5, 36); \ + y67 = y6+y7, y37 = y34+y67; \ + t3 = rotl64(y37 + y2, 29); \ + t7 = rotl64(y37 + y0, 55); \ + y27 = y25+y67; \ + t5 = rotl64(y27 + y4, 44); \ + t6 = rotl64(y27 + y1, 48); \ +} + +#define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \ +{ \ + uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \ + s04 = s0 ^ s4, t01 = t0 ^ t1; \ + r0 = (s04 ^ s1) + (t01 ^ t5); \ + t67 = t6 ^ t7; \ + r1 = (s04 ^ s7) + (t2 ^ t67); \ + s23 = s2 ^ s3; \ + r7 = (s23 ^ s5) + (t4 ^ t67); \ + t34 = t3 ^ t4; \ + r3 = (s23 ^ s4) + (t0 ^ t34); \ + s56 = s5 ^ s6; \ + r5 = (s3 ^ s56) + (t34 ^ t6); \ + t25 = t2 ^ t5; \ + r6 = (s2 ^ s56) + (t25 ^ t7); \ + s17 = s1 ^ s7; \ + r4 = (s0 ^ s17) + (t1 ^ t25); \ + r2 = (s17 ^ s6) + (t01 ^ t3); \ +} + +#define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \ +{ \ + uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \ + s04 = s0 ^ s4, t01 = t0 ^ t1; \ + r0 = (s04 ^ s1) + (t01 ^ t5); \ + t67 = t6 ^ t7; \ + r1 = (s04 ^ s7) + (t2 ^ t67); \ + s23 = s2 ^ s3; \ + r7 = (s23 ^ s5) + (t4 ^ t67); \ + t34 = t3 ^ t4; \ + r3 = (s23 ^ s4) + (t0 ^ t34); \ + s56 = s5 ^ s6; \ + r5 = (s3 ^ s56) + (t34 ^ t6); \ + t25 = t2 ^ t5; \ + r6 = (s2 ^ s56) + (t25 ^ t7); \ + s17 = s1 ^ s7; \ + r4 = (s0 ^ s17) + (t1 ^ t25); \ + r2 = (s17 ^ s6) + (t01 ^ t3); \ +} + +static size_t +Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p) +{ + size_t bl; + + for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE; + bl -= EdonR256_BLOCK_BITSIZE, data += 16) { + uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, + t5, t6, t7; + uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, + q5, q6, q7; + const uint32_t defix = 0xaaaaaaaa; +#if defined(MACHINE_IS_BIG_ENDIAN) + uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, + swp9, swp10, swp11, swp12, swp13, swp14, swp15; +#define d(j) swp ## j +#define s32(j) ld_swap32((uint32_t *)data + j, swp ## j) +#else +#define d(j) data[j] +#endif + + /* First row of quasigroup e-transformations */ +#if defined(MACHINE_IS_BIG_ENDIAN) + s32(8); + s32(9); + s32(10); + s32(11); + s32(12); + s32(13); + s32(14); + s32(15); +#endif + LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), + d(8)); +#if defined(MACHINE_IS_BIG_ENDIAN) + s32(0); + s32(1); + s32(2); + s32(3); + s32(4); + s32(5); + s32(6); + s32(7); +#undef s32 +#endif + LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); + quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), + d(15)); + quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Second row of quasigroup e-transformations */ + LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], + p[15]); + LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); + quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Third row of quasigroup e-transformations */ + LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); + quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); + LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Fourth row of quasigroup e-transformations */ + LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); + LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); + quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Edon-R tweak on the original SHA-3 Edon-R submission. */ + p[0] ^= d(8) ^ p0; + p[1] ^= d(9) ^ p1; + p[2] ^= d(10) ^ p2; + p[3] ^= d(11) ^ p3; + p[4] ^= d(12) ^ p4; + p[5] ^= d(13) ^ p5; + p[6] ^= d(14) ^ p6; + p[7] ^= d(15) ^ p7; + p[8] ^= d(0) ^ q0; + p[9] ^= d(1) ^ q1; + p[10] ^= d(2) ^ q2; + p[11] ^= d(3) ^ q3; + p[12] ^= d(4) ^ q4; + p[13] ^= d(5) ^ q5; + p[14] ^= d(6) ^ q6; + p[15] ^= d(7) ^ q7; + } + +#undef d + return (bitlen - bl); +} + +/* + * Why is this #pragma here? + * + * Checksum functions like this one can go over the stack frame size check + * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can + * safely ignore the compiler error since we know that in ZoL, that + * the function will be called from a worker thread that won't be using + * much stack. The only function that goes over the 1k limit is Q512(), + * which only goes over it by a hair (1248 bytes on ARM32). + */ +#include /* for _ILP32 */ +#ifdef _ILP32 /* We're 32-bit, assume small stack frames */ +#pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif + +#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__) +static inline size_t +#else +static size_t +#endif +Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p) +{ + size_t bl; + + for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE; + bl -= EdonR512_BLOCK_BITSIZE, data += 16) { + uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, + t5, t6, t7; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, + q5, q6, q7; + const uint64_t defix = 0xaaaaaaaaaaaaaaaaull; +#if defined(MACHINE_IS_BIG_ENDIAN) + uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, + swp9, swp10, swp11, swp12, swp13, swp14, swp15; +#define d(j) swp##j +#define s64(j) ld_swap64((uint64_t *)data+j, swp##j) +#else +#define d(j) data[j] +#endif + + /* First row of quasigroup e-transformations */ +#if defined(MACHINE_IS_BIG_ENDIAN) + s64(8); + s64(9); + s64(10); + s64(11); + s64(12); + s64(13); + s64(14); + s64(15); +#endif + LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), + d(8)); +#if defined(MACHINE_IS_BIG_ENDIAN) + s64(0); + s64(1); + s64(2); + s64(3); + s64(4); + s64(5); + s64(6); + s64(7); +#undef s64 +#endif + LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); + quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), + d(15)); + quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Second row of quasigroup e-transformations */ + LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], + p[15]); + LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); + quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Third row of quasigroup e-transformations */ + LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); + quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); + LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Fourth row of quasigroup e-transformations */ + LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); + LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); + quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + + /* Edon-R tweak on the original SHA-3 Edon-R submission. */ + p[0] ^= d(8) ^ p0; + p[1] ^= d(9) ^ p1; + p[2] ^= d(10) ^ p2; + p[3] ^= d(11) ^ p3; + p[4] ^= d(12) ^ p4; + p[5] ^= d(13) ^ p5; + p[6] ^= d(14) ^ p6; + p[7] ^= d(15) ^ p7; + p[8] ^= d(0) ^ q0; + p[9] ^= d(1) ^ q1; + p[10] ^= d(2) ^ q2; + p[11] ^= d(3) ^ q3; + p[12] ^= d(4) ^ q4; + p[13] ^= d(5) ^ q5; + p[14] ^= d(6) ^ q6; + p[15] ^= d(7) ^ q7; + } + +#undef d + return (bitlen - bl); +} + +void +EdonRInit(EdonRState *state, size_t hashbitlen) +{ + ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen)); + switch (hashbitlen) { + case 224: + state->hashbitlen = 224; + state->bits_processed = 0; + state->unprocessed_bits = 0; + bcopy(i224p2, hashState224(state)->DoublePipe, + 16 * sizeof (uint32_t)); + break; + + case 256: + state->hashbitlen = 256; + state->bits_processed = 0; + state->unprocessed_bits = 0; + bcopy(i256p2, hashState256(state)->DoublePipe, + 16 * sizeof (uint32_t)); + break; + + case 384: + state->hashbitlen = 384; + state->bits_processed = 0; + state->unprocessed_bits = 0; + bcopy(i384p2, hashState384(state)->DoublePipe, + 16 * sizeof (uint64_t)); + break; + + case 512: + state->hashbitlen = 512; + state->bits_processed = 0; + state->unprocessed_bits = 0; + bcopy(i512p2, hashState224(state)->DoublePipe, + 16 * sizeof (uint64_t)); + break; + } +} + + +void +EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen) +{ + uint32_t *data32; + uint64_t *data64; + + size_t bits_processed; + + ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); + switch (state->hashbitlen) { + case 224: + case 256: + if (state->unprocessed_bits > 0) { + /* LastBytes = databitlen / 8 */ + int LastBytes = (int)databitlen >> 3; + + ASSERT(state->unprocessed_bits + databitlen <= + EdonR256_BLOCK_SIZE * 8); + + bcopy(data, hashState256(state)->LastPart + + (state->unprocessed_bits >> 3), LastBytes); + state->unprocessed_bits += (int)databitlen; + databitlen = state->unprocessed_bits; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data32 = (uint32_t *)hashState256(state)->LastPart; + } else + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data32 = (uint32_t *)data; + + bits_processed = Q256(databitlen, data32, + hashState256(state)->DoublePipe); + state->bits_processed += bits_processed; + databitlen -= bits_processed; + state->unprocessed_bits = (int)databitlen; + if (databitlen > 0) { + /* LastBytes = Ceil(databitlen / 8) */ + int LastBytes = + ((~(((-(int)databitlen) >> 3) & 0x01ff)) + + 1) & 0x01ff; + + data32 += bits_processed >> 5; /* byte size update */ + bcopy(data32, hashState256(state)->LastPart, LastBytes); + } + break; + + case 384: + case 512: + if (state->unprocessed_bits > 0) { + /* LastBytes = databitlen / 8 */ + int LastBytes = (int)databitlen >> 3; + + ASSERT(state->unprocessed_bits + databitlen <= + EdonR512_BLOCK_SIZE * 8); + + bcopy(data, hashState512(state)->LastPart + + (state->unprocessed_bits >> 3), LastBytes); + state->unprocessed_bits += (int)databitlen; + databitlen = state->unprocessed_bits; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)hashState512(state)->LastPart; + } else + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)data; + + bits_processed = Q512(databitlen, data64, + hashState512(state)->DoublePipe); + state->bits_processed += bits_processed; + databitlen -= bits_processed; + state->unprocessed_bits = (int)databitlen; + if (databitlen > 0) { + /* LastBytes = Ceil(databitlen / 8) */ + int LastBytes = + ((~(((-(int)databitlen) >> 3) & 0x03ff)) + + 1) & 0x03ff; + + data64 += bits_processed >> 6; /* byte size update */ + bcopy(data64, hashState512(state)->LastPart, LastBytes); + } + break; + } +} + +void +EdonRFinal(EdonRState *state, uint8_t *hashval) +{ + uint32_t *data32; + uint64_t *data64, num_bits; + + size_t databitlen; + int LastByte, PadOnePosition; + + num_bits = state->bits_processed + state->unprocessed_bits; + ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); + switch (state->hashbitlen) { + case 224: + case 256: + LastByte = (int)state->unprocessed_bits >> 3; + PadOnePosition = 7 - (state->unprocessed_bits & 0x07); + hashState256(state)->LastPart[LastByte] = + (hashState256(state)->LastPart[LastByte] + & (0xff << (PadOnePosition + 1))) ^ + (0x01 << PadOnePosition); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)hashState256(state)->LastPart; + + if (state->unprocessed_bits < 448) { + (void) memset((hashState256(state)->LastPart) + + LastByte + 1, 0x00, + EdonR256_BLOCK_SIZE - LastByte - 9); + databitlen = EdonR256_BLOCK_SIZE * 8; +#if defined(MACHINE_IS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 7); +#else + data64[7] = num_bits; +#endif + } else { + (void) memset((hashState256(state)->LastPart) + + LastByte + 1, 0x00, + EdonR256_BLOCK_SIZE * 2 - LastByte - 9); + databitlen = EdonR256_BLOCK_SIZE * 16; +#if defined(MACHINE_IS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 15); +#else + data64[15] = num_bits; +#endif + } + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data32 = (uint32_t *)hashState256(state)->LastPart; + state->bits_processed += Q256(databitlen, data32, + hashState256(state)->DoublePipe); + break; + + case 384: + case 512: + LastByte = (int)state->unprocessed_bits >> 3; + PadOnePosition = 7 - (state->unprocessed_bits & 0x07); + hashState512(state)->LastPart[LastByte] = + (hashState512(state)->LastPart[LastByte] + & (0xff << (PadOnePosition + 1))) ^ + (0x01 << PadOnePosition); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)hashState512(state)->LastPart; + + if (state->unprocessed_bits < 960) { + (void) memset((hashState512(state)->LastPart) + + LastByte + 1, 0x00, + EdonR512_BLOCK_SIZE - LastByte - 9); + databitlen = EdonR512_BLOCK_SIZE * 8; +#if defined(MACHINE_IS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 15); +#else + data64[15] = num_bits; +#endif + } else { + (void) memset((hashState512(state)->LastPart) + + LastByte + 1, 0x00, + EdonR512_BLOCK_SIZE * 2 - LastByte - 9); + databitlen = EdonR512_BLOCK_SIZE * 16; +#if defined(MACHINE_IS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 31); +#else + data64[31] = num_bits; +#endif + } + + state->bits_processed += Q512(databitlen, data64, + hashState512(state)->DoublePipe); + break; + } + + switch (state->hashbitlen) { + case 224: { +#if defined(MACHINE_IS_BIG_ENDIAN) + uint32_t *d32 = (uint32_t *)hashval; + uint32_t *s32 = hashState224(state)->DoublePipe + 9; + int j; + + for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++) + st_swap32(s32[j], d32 + j); +#else + bcopy(hashState256(state)->DoublePipe + 9, hashval, + EdonR224_DIGEST_SIZE); +#endif + break; + } + case 256: { +#if defined(MACHINE_IS_BIG_ENDIAN) + uint32_t *d32 = (uint32_t *)hashval; + uint32_t *s32 = hashState224(state)->DoublePipe + 8; + int j; + + for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++) + st_swap32(s32[j], d32 + j); +#else + bcopy(hashState256(state)->DoublePipe + 8, hashval, + EdonR256_DIGEST_SIZE); +#endif + break; + } + case 384: { +#if defined(MACHINE_IS_BIG_ENDIAN) + uint64_t *d64 = (uint64_t *)hashval; + uint64_t *s64 = hashState384(state)->DoublePipe + 10; + int j; + + for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++) + st_swap64(s64[j], d64 + j); +#else + bcopy(hashState384(state)->DoublePipe + 10, hashval, + EdonR384_DIGEST_SIZE); +#endif + break; + } + case 512: { +#if defined(MACHINE_IS_BIG_ENDIAN) + uint64_t *d64 = (uint64_t *)hashval; + uint64_t *s64 = hashState512(state)->DoublePipe + 8; + int j; + + for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++) + st_swap64(s64[j], d64 + j); +#else + bcopy(hashState512(state)->DoublePipe + 8, hashval, + EdonR512_DIGEST_SIZE); +#endif + break; + } + } +} + + +void +EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen, + uint8_t *hashval) +{ + EdonRState state; + + EdonRInit(&state, hashbitlen); + EdonRUpdate(&state, data, databitlen); + EdonRFinal(&state, hashval); +} + +#ifdef _KERNEL +EXPORT_SYMBOL(EdonRInit); +EXPORT_SYMBOL(EdonRUpdate); +EXPORT_SYMBOL(EdonRHash); +EXPORT_SYMBOL(EdonRFinal); +#endif diff --git a/module/icp/algs/edonr/edonr_byteorder.h b/module/icp/algs/edonr/edonr_byteorder.h new file mode 100644 index 000000000..d17e8f1fd --- /dev/null +++ b/module/icp/algs/edonr/edonr_byteorder.h @@ -0,0 +1,216 @@ +/* + * IDI,NTNU + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright (C) 2009, 2010, Jorn Amundsen + * + * C header file to determine compile machine byte order. Take care when cross + * compiling. + * + * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $ + */ +/* + * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved + */ + +#ifndef _CRYPTO_EDONR_BYTEORDER_H +#define _CRYPTO_EDONR_BYTEORDER_H + + +#include + +#if defined(__BYTE_ORDER) +#if (__BYTE_ORDER == __BIG_ENDIAN) +#define MACHINE_IS_BIG_ENDIAN +#elif (__BYTE_ORDER == __LITTLE_ENDIAN) +#define MACHINE_IS_LITTLE_ENDIAN +#endif +#elif defined(BYTE_ORDER) +#if (BYTE_ORDER == BIG_ENDIAN) +#define MACHINE_IS_BIG_ENDIAN +#elif (BYTE_ORDER == LITTLE_ENDIAN) +#define MACHINE_IS_LITTLE_ENDIAN +#endif +#endif /* __BYTE_ORDER || BYTE_ORDER */ + +#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN) +#if defined(_BIG_ENDIAN) || defined(_MIPSEB) +#define MACHINE_IS_BIG_ENDIAN +#endif +#if defined(_LITTLE_ENDIAN) || defined(_MIPSEL) +#define MACHINE_IS_LITTLE_ENDIAN +#endif +#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */ + +#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN) +#error unknown machine byte sex +#endif + +#define BYTEORDER_INCLUDED + +#if defined(MACHINE_IS_BIG_ENDIAN) +/* + * Byte swapping macros for big endian architectures and compilers, + * add as appropriate for other architectures and/or compilers. + * + * ld_swap64(src,dst) : uint64_t dst = *(src) + * st_swap64(src,dst) : *(dst) = uint64_t src + */ + +#if defined(__PPC__) || defined(_ARCH_PPC) + +#if defined(__64BIT__) +#if defined(_ARCH_PWR7) +#define aix_ld_swap64(s64, d64)\ + __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64)) +#define aix_st_swap64(s64, d64)\ + __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64)) +#else +#define aix_ld_swap64(s64, d64) \ +{ \ + uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \ + \ + __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\ + : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64)); \ +} + +#define aix_st_swap64(s64, d64) \ +{ \ + uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \ + h = (s64) >> 32; \ + __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \ + : "+r"(s4) : "r"(s64), "r"(h), "b"(d64)); \ +} +#endif /* 64BIT && PWR7 */ +#else +#define aix_ld_swap64(s64, d64) \ +{ \ + uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\ + __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0" \ + : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64)); \ + d64 = ((uint64_t)h<<32) | l; \ +} + +#define aix_st_swap64(s64, d64) \ +{ \ + uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\ + l = (s64) & 0xfffffffful, h = (s64) >> 32; \ + __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \ + : "+r"(s4) : "r"(l), "r"(h), "b"(d64)); \ +} +#endif /* __64BIT__ */ +#define aix_ld_swap32(s32, d32)\ + __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32)) +#define aix_st_swap32(s32, d32)\ + __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32)) +#define ld_swap32(s, d) aix_ld_swap32(s, d) +#define st_swap32(s, d) aix_st_swap32(s, d) +#define ld_swap64(s, d) aix_ld_swap64(s, d) +#define st_swap64(s, d) aix_st_swap64(s, d) +#endif /* __PPC__ || _ARCH_PPC */ + +#if defined(__sparc) +#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9) +#define __arch64__ +#endif +#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590) +/* need Sun Studio C 5.10 and above for GNU inline assembly */ +#if defined(__arch64__) +#define sparc_ld_swap64(s64, d64) \ + __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64)) +#define sparc_st_swap64(s64, d64) \ + __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64)) +#define st_swap64(s, d) sparc_st_swap64(s, d) +#else +#define sparc_ld_swap64(s64, d64) \ +{ \ + uint32_t *s4, h, l; \ + __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" \ + : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64)); \ + d64 = ((uint64_t)h<<32) | l; \ +} +#define sparc_st_swap64(s64, d64) \ +{ \ + uint32_t *s4, h, l; \ + l = (s64) & 0xfffffffful, h = (s64) >> 32; \ + __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\ + : "+r"(s4) : "r"(l), "r"(h), "r"(d64)); \ +} +#endif /* sparc64 */ +#define sparc_ld_swap32(s32, d32)\ + __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32)) +#define sparc_st_swap32(s32, d32)\ + __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32)) +#define ld_swap32(s, d) sparc_ld_swap32(s, d) +#define st_swap32(s, d) sparc_st_swap32(s, d) +#define ld_swap64(s, d) sparc_ld_swap64(s, d) +#define st_swap64(s, d) sparc_st_swap64(s, d) +#endif /* GCC || Sun Studio C > 5.9 */ +#endif /* sparc */ + +/* GCC fallback */ +#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32) +#define ld_swap32(s, d) (d = __builtin_bswap32(*(s))) +#define st_swap32(s, d) (*(d) = __builtin_bswap32(s)) +#endif /* GCC4/PGIC && !swap32 */ +#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64) +#define ld_swap64(s, d) (d = __builtin_bswap64(*(s))) +#define st_swap64(s, d) (*(d) = __builtin_bswap64(s)) +#endif /* GCC4/PGIC && !swap64 */ + +/* generic fallback */ +#if !defined(ld_swap32) +#define ld_swap32(s, d) \ + (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) | \ + (*(s) << 8 & 0xff0000) | (*(s) << 24)) +#define st_swap32(s, d) \ + (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) | \ + ((s) << 8 & 0xff0000) | ((s) << 24)) +#endif +#if !defined(ld_swap64) +#define ld_swap64(s, d) \ + (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) | \ + (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) | \ + (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 | \ + (*(s) & 0xff00) << 40 | *(s) << 56) +#define st_swap64(s, d) \ + (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) | \ + ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) | \ + ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 | \ + ((s) & 0xff00) << 40 | (s) << 56) +#endif + +#endif /* MACHINE_IS_BIG_ENDIAN */ + + +#if defined(MACHINE_IS_LITTLE_ENDIAN) +/* replace swaps with simple assignments on little endian systems */ +#undef ld_swap32 +#undef st_swap32 +#define ld_swap32(s, d) (d = *(s)) +#define st_swap32(s, d) (*(d) = s) +#undef ld_swap64 +#undef st_swap64 +#define ld_swap64(s, d) (d = *(s)) +#define st_swap64(s, d) (*(d) = s) +#endif /* MACHINE_IS_LITTLE_ENDIAN */ + +#endif /* _CRYPTO_EDONR_BYTEORDER_H */ diff --git a/module/icp/algs/sha2/sha2.c b/module/icp/algs/sha2/sha2.c index 792ca8825..dbe008190 100644 --- a/module/icp/algs/sha2/sha2.c +++ b/module/icp/algs/sha2/sha2.c @@ -38,7 +38,7 @@ #include #define _SHA2_IMPL -#include +#include #include #define _RESTRICT_KYWD @@ -47,18 +47,37 @@ #include #define HAVE_HTONL #endif +#include /* for _ILP32 */ static void Encode(uint8_t *, uint32_t *, size_t); +static void Encode64(uint8_t *, uint64_t *, size_t); #if defined(__amd64) +#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1) #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1) + +void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); + #else static void SHA256Transform(SHA2_CTX *, const uint8_t *); +static void SHA512Transform(SHA2_CTX *, const uint8_t *); #endif /* __amd64 */ static uint8_t PADDING[128] = { 0x80, /* all zeros */ }; +/* + * The low-level checksum routines use a lot of stack space. On systems where + * small stacks are enforced (like 32-bit kernel builds), insert compiler memory + * barriers to reduce stack frame size. This can reduce the SHA512Transform() + * stack frame usage from 3k to <1k on ARM32, for example. + */ +#if defined(_ILP32) || defined(__powerpc) /* small stack */ +#define SMALL_STACK_MEMORY_BARRIER asm volatile("": : :"memory"); +#else +#define SMALL_STACK_MEMORY_BARRIER +#endif + /* Ch and Maj are the basic SHA2 functions. */ #define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d))) #define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d))) @@ -82,6 +101,18 @@ static uint8_t PADDING[128] = { 0x80, /* all zeros */ }; T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \ h = T1 + T2 +/* SHA384/512 Functions */ +#define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39)) +#define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41)) +#define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7)) +#define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6)) +#define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \ + T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \ + d += T1; \ + T2 = BIGSIGMA0(a) + Maj(a, b, c); \ + h = T1 + T2; \ + SMALL_STACK_MEMORY_BARRIER; + /* * sparc optimization: * @@ -130,6 +161,33 @@ SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk) uint32_t w8, w9, w10, w11, w12, w13, w14, w15; uint32_t T1, T2; +#if defined(__sparc) + static const uint32_t sha256_consts[] = { + SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2, + SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5, + SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8, + SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11, + SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14, + SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17, + SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20, + SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23, + SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26, + SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29, + SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32, + SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35, + SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38, + SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41, + SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44, + SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47, + SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50, + SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53, + SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56, + SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59, + SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62, + SHA256_CONST_63 + }; +#endif /* __sparc */ + if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); blk = (uint8_t *)ctx->buf_un.buf32; @@ -292,6 +350,256 @@ SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk) ctx->state.s32[6] += g; ctx->state.s32[7] += h; } + + +/* SHA384 and SHA512 Transform */ + +static void +SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk) +{ + + uint64_t a = ctx->state.s64[0]; + uint64_t b = ctx->state.s64[1]; + uint64_t c = ctx->state.s64[2]; + uint64_t d = ctx->state.s64[3]; + uint64_t e = ctx->state.s64[4]; + uint64_t f = ctx->state.s64[5]; + uint64_t g = ctx->state.s64[6]; + uint64_t h = ctx->state.s64[7]; + + uint64_t w0, w1, w2, w3, w4, w5, w6, w7; + uint64_t w8, w9, w10, w11, w12, w13, w14, w15; + uint64_t T1, T2; + +#if defined(__sparc) + static const uint64_t sha512_consts[] = { + SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2, + SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5, + SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8, + SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11, + SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14, + SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17, + SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20, + SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23, + SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26, + SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29, + SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32, + SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35, + SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38, + SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41, + SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44, + SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47, + SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50, + SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53, + SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56, + SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59, + SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62, + SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65, + SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68, + SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71, + SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74, + SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77, + SHA512_CONST_78, SHA512_CONST_79 + }; +#endif /* __sparc */ + + + if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */ + bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64)); + blk = (uint8_t *)ctx->buf_un.buf64; + } + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w0 = LOAD_BIG_64(blk + 8 * 0); + SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w1 = LOAD_BIG_64(blk + 8 * 1); + SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w2 = LOAD_BIG_64(blk + 8 * 2); + SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w3 = LOAD_BIG_64(blk + 8 * 3); + SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w4 = LOAD_BIG_64(blk + 8 * 4); + SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w5 = LOAD_BIG_64(blk + 8 * 5); + SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w6 = LOAD_BIG_64(blk + 8 * 6); + SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w7 = LOAD_BIG_64(blk + 8 * 7); + SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w8 = LOAD_BIG_64(blk + 8 * 8); + SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w9 = LOAD_BIG_64(blk + 8 * 9); + SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w10 = LOAD_BIG_64(blk + 8 * 10); + SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w11 = LOAD_BIG_64(blk + 8 * 11); + SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w12 = LOAD_BIG_64(blk + 8 * 12); + SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w13 = LOAD_BIG_64(blk + 8 * 13); + SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w14 = LOAD_BIG_64(blk + 8 * 14); + SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + w15 = LOAD_BIG_64(blk + 8 * 15); + SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15); + + w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; + SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0); + w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; + SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1); + w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; + SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2); + w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; + SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3); + w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; + SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4); + w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; + SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5); + w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; + SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6); + w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; + SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7); + w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; + SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8); + w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; + SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9); + w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; + SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10); + w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; + SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11); + w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; + SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12); + w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; + SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13); + w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; + SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14); + w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; + SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15); + + w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; + SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0); + w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; + SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1); + w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; + SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2); + w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; + SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3); + w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; + SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4); + w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; + SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5); + w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; + SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6); + w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; + SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7); + w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; + SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8); + w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; + SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9); + w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; + SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10); + w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; + SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11); + w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; + SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12); + w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; + SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13); + w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; + SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14); + w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; + SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15); + + w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; + SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0); + w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; + SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1); + w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; + SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2); + w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; + SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3); + w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; + SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4); + w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; + SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5); + w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; + SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6); + w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; + SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7); + w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; + SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8); + w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; + SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9); + w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; + SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10); + w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; + SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11); + w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; + SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12); + w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; + SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13); + w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; + SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14); + w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; + SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15); + + w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; + SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0); + w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; + SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1); + w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; + SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2); + w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; + SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3); + w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; + SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4); + w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; + SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5); + w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; + SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6); + w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; + SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7); + w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; + SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8); + w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; + SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9); + w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; + SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10); + w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; + SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11); + w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; + SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12); + w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; + SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13); + w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; + SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14); + w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; + SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15); + + ctx->state.s64[0] += a; + ctx->state.s64[1] += b; + ctx->state.s64[2] += c; + ctx->state.s64[3] += d; + ctx->state.s64[4] += e; + ctx->state.s64[5] += f; + ctx->state.s64[6] += g; + ctx->state.s64[7] += h; + +} #endif /* !__amd64 */ @@ -311,14 +619,56 @@ Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input, { size_t i, j; - for (i = 0, j = 0; j < len; i++, j += 4) { - output[j] = (input[i] >> 24) & 0xff; - output[j + 1] = (input[i] >> 16) & 0xff; - output[j + 2] = (input[i] >> 8) & 0xff; - output[j + 3] = input[i] & 0xff; +#if defined(__sparc) + if (IS_P2ALIGNED(output, sizeof (uint32_t))) { + for (i = 0, j = 0; j < len; i++, j += 4) { + /* LINTED E_BAD_PTR_CAST_ALIGN */ + *((uint32_t *)(output + j)) = input[i]; + } + } else { +#endif /* little endian -- will work on big endian, but slowly */ + for (i = 0, j = 0; j < len; i++, j += 4) { + output[j] = (input[i] >> 24) & 0xff; + output[j + 1] = (input[i] >> 16) & 0xff; + output[j + 2] = (input[i] >> 8) & 0xff; + output[j + 3] = input[i] & 0xff; + } +#if defined(__sparc) } +#endif } +static void +Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input, + size_t len) +{ + size_t i, j; + +#if defined(__sparc) + if (IS_P2ALIGNED(output, sizeof (uint64_t))) { + for (i = 0, j = 0; j < len; i++, j += 8) { + /* LINTED E_BAD_PTR_CAST_ALIGN */ + *((uint64_t *)(output + j)) = input[i]; + } + } else { +#endif /* little endian -- will work on big endian, but slowly */ + for (i = 0, j = 0; j < len; i++, j += 8) { + + output[j] = (input[i] >> 56) & 0xff; + output[j + 1] = (input[i] >> 48) & 0xff; + output[j + 2] = (input[i] >> 40) & 0xff; + output[j + 3] = (input[i] >> 32) & 0xff; + output[j + 4] = (input[i] >> 24) & 0xff; + output[j + 5] = (input[i] >> 16) & 0xff; + output[j + 6] = (input[i] >> 8) & 0xff; + output[j + 7] = input[i] & 0xff; + } +#if defined(__sparc) + } +#endif +} + + void SHA2Init(uint64_t mech, SHA2_CTX *ctx) { @@ -336,22 +686,86 @@ SHA2Init(uint64_t mech, SHA2_CTX *ctx) ctx->state.s32[6] = 0x1f83d9abU; ctx->state.s32[7] = 0x5be0cd19U; break; + case SHA384_MECH_INFO_TYPE: + case SHA384_HMAC_MECH_INFO_TYPE: + case SHA384_HMAC_GEN_MECH_INFO_TYPE: + ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL; + ctx->state.s64[1] = 0x629a292a367cd507ULL; + ctx->state.s64[2] = 0x9159015a3070dd17ULL; + ctx->state.s64[3] = 0x152fecd8f70e5939ULL; + ctx->state.s64[4] = 0x67332667ffc00b31ULL; + ctx->state.s64[5] = 0x8eb44a8768581511ULL; + ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL; + ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL; + break; + case SHA512_MECH_INFO_TYPE: + case SHA512_HMAC_MECH_INFO_TYPE: + case SHA512_HMAC_GEN_MECH_INFO_TYPE: + ctx->state.s64[0] = 0x6a09e667f3bcc908ULL; + ctx->state.s64[1] = 0xbb67ae8584caa73bULL; + ctx->state.s64[2] = 0x3c6ef372fe94f82bULL; + ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL; + ctx->state.s64[4] = 0x510e527fade682d1ULL; + ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL; + ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL; + ctx->state.s64[7] = 0x5be0cd19137e2179ULL; + break; + case SHA512_224_MECH_INFO_TYPE: + ctx->state.s64[0] = 0x8C3D37C819544DA2ULL; + ctx->state.s64[1] = 0x73E1996689DCD4D6ULL; + ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL; + ctx->state.s64[3] = 0x679DD514582F9FCFULL; + ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL; + ctx->state.s64[5] = 0x77E36F7304C48942ULL; + ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL; + ctx->state.s64[7] = 0x1112E6AD91D692A1ULL; + break; + case SHA512_256_MECH_INFO_TYPE: + ctx->state.s64[0] = 0x22312194FC2BF72CULL; + ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL; + ctx->state.s64[2] = 0x2393B86B6F53B151ULL; + ctx->state.s64[3] = 0x963877195940EABDULL; + ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL; + ctx->state.s64[5] = 0xBE5E1E2553863992ULL; + ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL; + ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL; + break; +#ifdef _KERNEL default: cmn_err(CE_PANIC, "sha2_init: failed to find a supported algorithm: 0x%x", (uint32_t)mech); + +#endif /* _KERNEL */ } ctx->algotype = (uint32_t)mech; ctx->count.c64[0] = ctx->count.c64[1] = 0; } +#ifndef _KERNEL + +// #pragma inline(SHA256Init, SHA384Init, SHA512Init) void SHA256Init(SHA256_CTX *ctx) { SHA2Init(SHA256, ctx); } +void +SHA384Init(SHA384_CTX *ctx) +{ + SHA2Init(SHA384, ctx); +} + +void +SHA512Init(SHA512_CTX *ctx) +{ + SHA2Init(SHA512, ctx); +} + +#endif /* _KERNEL */ + /* * SHA2Update() * @@ -422,6 +836,8 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len) bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) SHA256Transform(ctx, ctx->buf_un.buf8); + else + SHA512Transform(ctx, ctx->buf_un.buf8); i = buf_len; } @@ -431,6 +847,10 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len) for (; i + buf_limit - 1 < input_len; i += buf_limit) { SHA256Transform(ctx, &input[i]); } + } else { + for (; i + buf_limit - 1 < input_len; i += buf_limit) { + SHA512Transform(ctx, &input[i]); + } } #else @@ -441,6 +861,13 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len) block_count); i += block_count << 6; } + } else { + block_count = (input_len - i) >> 7; + if (block_count > 0) { + SHA512TransformBlocks(ctx, &input[i], + block_count); + i += block_count << 7; + } } #endif /* !__amd64 */ @@ -479,6 +906,7 @@ void SHA2Final(void *digest, SHA2_CTX *ctx) { uint8_t bitcount_be[sizeof (ctx->count.c32)]; + uint8_t bitcount_be64[sizeof (ctx->count.c64)]; uint32_t index; uint32_t algotype = ctx->algotype; @@ -488,8 +916,45 @@ SHA2Final(void *digest, SHA2_CTX *ctx) SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); SHA2Update(ctx, bitcount_be, sizeof (bitcount_be)); Encode(digest, ctx->state.s32, sizeof (ctx->state.s32)); + } else { + index = (ctx->count.c64[1] >> 3) & 0x7f; + Encode64(bitcount_be64, ctx->count.c64, + sizeof (bitcount_be64)); + SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index); + SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64)); + if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) { + ctx->state.s64[6] = ctx->state.s64[7] = 0; + Encode64(digest, ctx->state.s64, + sizeof (uint64_t) * 6); + } else if (algotype == SHA512_224_MECH_INFO_TYPE) { + uint8_t last[sizeof (uint64_t)]; + /* + * Since SHA-512/224 doesn't align well to 64-bit + * boundaries, we must do the encoding in three steps: + * 1) encode the three 64-bit words that fit neatly + * 2) encode the last 64-bit word to a temp buffer + * 3) chop out the lower 32-bits from the temp buffer + * and append them to the digest + */ + Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3); + Encode64(last, &ctx->state.s64[3], sizeof (uint64_t)); + bcopy(last, (uint8_t *)digest + 24, 4); + } else if (algotype == SHA512_256_MECH_INFO_TYPE) { + Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4); + } else { + Encode64(digest, ctx->state.s64, + sizeof (ctx->state.s64)); + } } /* zeroize sensitive information */ bzero(ctx, sizeof (*ctx)); } + + + +#ifdef _KERNEL +EXPORT_SYMBOL(SHA2Init); +EXPORT_SYMBOL(SHA2Update); +EXPORT_SYMBOL(SHA2Final); +#endif diff --git a/module/icp/algs/skein/THIRDPARTYLICENSE b/module/icp/algs/skein/THIRDPARTYLICENSE new file mode 100644 index 000000000..b7434fd17 --- /dev/null +++ b/module/icp/algs/skein/THIRDPARTYLICENSE @@ -0,0 +1,3 @@ +Implementation of the Skein hash function. +Source code author: Doug Whiting, 2008. +This algorithm and source code is released to the public domain. diff --git a/module/icp/algs/skein/THIRDPARTYLICENSE.descrip b/module/icp/algs/skein/THIRDPARTYLICENSE.descrip new file mode 100644 index 000000000..0ae89cfdf --- /dev/null +++ b/module/icp/algs/skein/THIRDPARTYLICENSE.descrip @@ -0,0 +1 @@ +LICENSE TERMS OF SKEIN HASH ALGORITHM IMPLEMENTATION diff --git a/module/icp/algs/skein/skein.c b/module/icp/algs/skein/skein.c new file mode 100644 index 000000000..0981eee08 --- /dev/null +++ b/module/icp/algs/skein/skein.c @@ -0,0 +1,921 @@ +/* + * Implementation of the Skein hash function. + * Source code author: Doug Whiting, 2008. + * This algorithm and source code is released to the public domain. + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ + +#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ + +#include +#include +#include /* get the Skein API definitions */ +#include "skein_impl.h" /* get internal definitions */ + +/* External function to process blkCnt (nonzero) full block(s) of data. */ +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd); +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd); +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd); + +/* 256-bit Skein */ +/* init the context for a straight hashing operation */ +int +Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) +{ + union { + uint8_t b[SKEIN_256_STATE_BYTES]; + uint64_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 256: + bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X)); + break; + case 224: + bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X)); + break; + case 160: + bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X)); + break; + case 128: + bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X)); + break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* + * build/process the config block, type == CONFIG (could be + * precomputed) + */ + /* set tweaks: T0=0; T1=CFG | FINAL */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + /* set the schema, version */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + /* hash result length in bits */ + cfg.w[1] = Skein_Swap64(hashBitLen); + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + /* zero pad config block */ + bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + + /* compute the initial chaining values from config block */ + /* zero the chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + break; + } + /* + * The chaining vars ctx->X are now initialized for the given + * hashBitLen. + * Set up to process the data message portion of the hash (default) + */ + Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ + + return (SKEIN_SUCCESS); +} + +/* init the context for a MAC and/or tree hash operation */ +/* + * [identical to Skein_256_Init() when keyBytes == 0 && + * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] + */ +int +Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, + const uint8_t *key, size_t keyBytes) +{ + union { + uint8_t b[SKEIN_256_STATE_BYTES]; + uint64_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) { /* is there a key? */ + /* no key: use all zeroes as key for config block */ + bzero(ctx->X, sizeof (ctx->X)); + } else { /* here to pre-process a key */ + + Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); + /* do a mini-Init right here */ + /* set output hash bit count = state size */ + ctx->h.hashBitLen = 8 * sizeof (ctx->X); + /* set tweaks: T0 = 0; T1 = KEY type */ + Skein_Start_New_Type(ctx, KEY); + /* zero the initial chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + /* hash the key */ + (void) Skein_256_Update(ctx, key, keyBytes); + /* put result into cfg.b[] */ + (void) Skein_256_Final_Pad(ctx, cfg.b); + /* copy over into ctx->X[] */ + bcopy(cfg.b, ctx->X, sizeof (cfg.b)); +#if SKEIN_NEED_SWAP + { + uint_t i; + /* convert key bytes to context words */ + for (i = 0; i < SKEIN_256_STATE_WORDS; i++) + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* + * build/process the config block, type == CONFIG (could be + * precomputed for each key) + */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + cfg.w[2] = Skein_Swap64(treeInfo); + + Skein_Show_Key(256, &ctx->h, key, keyBytes); + + /* compute the initial chaining values from config block */ + Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx, MSG); + + return (SKEIN_SUCCESS); +} + +/* process the input bytes */ +int +Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) +{ + size_t n; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) { + /* finish up any buffered message data */ + if (ctx->h.bCnt) { + /* # bytes free in buffer b[] */ + n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; + if (n) { + /* check on our logic here */ + Skein_assert(n < msgByteCnt); + bcopy(msg, &ctx->b[ctx->h.bCnt], n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); + Skein_256_Process_Block(ctx, ctx->b, 1, + SKEIN_256_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* + * now process any remaining full blocks, directly from input + * message data + */ + if (msgByteCnt > SKEIN_256_BLOCK_BYTES) { + /* number of full blocks to process */ + n = (msgByteCnt - 1) / SKEIN_256_BLOCK_BYTES; + Skein_256_Process_Block(ctx, msg, n, + SKEIN_256_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; + msg += n * SKEIN_256_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); + bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return (SKEIN_SUCCESS); +} + +/* finalize the hash computation and output the result */ +int +Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN_256_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + + /* process the final block */ + Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN_256_BLOCK_BYTES; + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256, &ctx->h, n, + hashVal + i * SKEIN_256_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} + +/* 512-bit Skein */ + +/* init the context for a straight hashing operation */ +int +Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) +{ + union { + uint8_t b[SKEIN_512_STATE_BYTES]; + uint64_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: + bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X)); + break; + case 384: + bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X)); + break; + case 256: + bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X)); + break; + case 224: + bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X)); + break; +#endif + default: + /* + * here if there is no precomputed IV value available + * build/process the config block, type == CONFIG (could be + * precomputed) + */ + /* set tweaks: T0=0; T1=CFG | FINAL */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + /* set the schema, version */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + /* hash result length in bits */ + cfg.w[1] = Skein_Swap64(hashBitLen); + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + /* zero pad config block */ + bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + + /* compute the initial chaining values from config block */ + /* zero the chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + break; + } + + /* + * The chaining vars ctx->X are now initialized for the given + * hashBitLen. Set up to process the data message portion of the + * hash (default) + */ + Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ + + return (SKEIN_SUCCESS); +} + +/* init the context for a MAC and/or tree hash operation */ +/* + * [identical to Skein_512_Init() when keyBytes == 0 && + * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] + */ +int +Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, + const uint8_t *key, size_t keyBytes) +{ + union { + uint8_t b[SKEIN_512_STATE_BYTES]; + uint64_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) { /* is there a key? */ + /* no key: use all zeroes as key for config block */ + bzero(ctx->X, sizeof (ctx->X)); + } else { /* here to pre-process a key */ + + Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); + /* do a mini-Init right here */ + /* set output hash bit count = state size */ + ctx->h.hashBitLen = 8 * sizeof (ctx->X); + /* set tweaks: T0 = 0; T1 = KEY type */ + Skein_Start_New_Type(ctx, KEY); + /* zero the initial chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + (void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */ + /* put result into cfg.b[] */ + (void) Skein_512_Final_Pad(ctx, cfg.b); + /* copy over into ctx->X[] */ + bcopy(cfg.b, ctx->X, sizeof (cfg.b)); +#if SKEIN_NEED_SWAP + { + uint_t i; + /* convert key bytes to context words */ + for (i = 0; i < SKEIN_512_STATE_WORDS; i++) + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* + * build/process the config block, type == CONFIG (could be + * precomputed for each key) + */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + cfg.w[2] = Skein_Swap64(treeInfo); + + Skein_Show_Key(512, &ctx->h, key, keyBytes); + + /* compute the initial chaining values from config block */ + Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx, MSG); + + return (SKEIN_SUCCESS); +} + +/* process the input bytes */ +int +Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) +{ + size_t n; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) { + /* finish up any buffered message data */ + if (ctx->h.bCnt) { + /* # bytes free in buffer b[] */ + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; + if (n) { + /* check on our logic here */ + Skein_assert(n < msgByteCnt); + bcopy(msg, &ctx->b[ctx->h.bCnt], n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); + Skein_512_Process_Block(ctx, ctx->b, 1, + SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* + * now process any remaining full blocks, directly from input + * message data + */ + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) { + /* number of full blocks to process */ + n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES; + Skein_512_Process_Block(ctx, msg, n, + SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); + bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return (SKEIN_SUCCESS); +} + +/* finalize the hash computation and output the result */ +int +Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN_512_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + + /* process the final block */ + Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN_512_BLOCK_BYTES; + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(512, &ctx->h, n, + hashVal + i * SKEIN_512_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} + +/* 1024-bit Skein */ + +/* init the context for a straight hashing operation */ +int +Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) +{ + union { + uint8_t b[SKEIN1024_STATE_BYTES]; + uint64_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: + bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X)); + break; + case 384: + bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X)); + break; + case 1024: + bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X)); + break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* + * build/process the config block, type == CONFIG (could be + * precomputed) + */ + /* set tweaks: T0=0; T1=CFG | FINAL */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + /* set the schema, version */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + /* hash result length in bits */ + cfg.w[1] = Skein_Swap64(hashBitLen); + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + /* zero pad config block */ + bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + + /* compute the initial chaining values from config block */ + /* zero the chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + break; + } + + /* + * The chaining vars ctx->X are now initialized for the given + * hashBitLen. Set up to process the data message portion of the hash + * (default) + */ + Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ + + return (SKEIN_SUCCESS); +} + +/* init the context for a MAC and/or tree hash operation */ +/* + * [identical to Skein1024_Init() when keyBytes == 0 && + * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] + */ +int +Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, + const uint8_t *key, size_t keyBytes) +{ + union { + uint8_t b[SKEIN1024_STATE_BYTES]; + uint64_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) { /* is there a key? */ + /* no key: use all zeroes as key for config block */ + bzero(ctx->X, sizeof (ctx->X)); + } else { /* here to pre-process a key */ + Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); + /* do a mini-Init right here */ + /* set output hash bit count = state size */ + ctx->h.hashBitLen = 8 * sizeof (ctx->X); + /* set tweaks: T0 = 0; T1 = KEY type */ + Skein_Start_New_Type(ctx, KEY); + /* zero the initial chaining variables */ + bzero(ctx->X, sizeof (ctx->X)); + (void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */ + /* put result into cfg.b[] */ + (void) Skein1024_Final_Pad(ctx, cfg.b); + /* copy over into ctx->X[] */ + bcopy(cfg.b, ctx->X, sizeof (cfg.b)); +#if SKEIN_NEED_SWAP + { + uint_t i; + /* convert key bytes to context words */ + for (i = 0; i < SKEIN1024_STATE_WORDS; i++) + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* + * build/process the config block, type == CONFIG (could be + * precomputed for each key) + */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx, CFG_FINAL); + + bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + /* hash result length in bits */ + cfg.w[1] = Skein_Swap64(hashBitLen); + /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + cfg.w[2] = Skein_Swap64(treeInfo); + + Skein_Show_Key(1024, &ctx->h, key, keyBytes); + + /* compute the initial chaining values from config block */ + Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx, MSG); + + return (SKEIN_SUCCESS); +} + +/* process the input bytes */ +int +Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) +{ + size_t n; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) { + /* finish up any buffered message data */ + if (ctx->h.bCnt) { + /* # bytes free in buffer b[] */ + n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; + if (n) { + /* check on our logic here */ + Skein_assert(n < msgByteCnt); + bcopy(msg, &ctx->b[ctx->h.bCnt], n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); + Skein1024_Process_Block(ctx, ctx->b, 1, + SKEIN1024_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* + * now process any remaining full blocks, directly from + * input message data + */ + if (msgByteCnt > SKEIN1024_BLOCK_BYTES) { + /* number of full blocks to process */ + n = (msgByteCnt - 1) / SKEIN1024_BLOCK_BYTES; + Skein1024_Process_Block(ctx, msg, n, + SKEIN1024_BLOCK_BYTES); + msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; + msg += n * SKEIN1024_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); + bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return (SKEIN_SUCCESS); +} + +/* finalize the hash computation and output the result */ +int +Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN1024_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + + /* process the final block */ + Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN1024_BLOCK_BYTES; + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(1024, &ctx->h, n, + hashVal + i * SKEIN1024_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} + +/* Functions to support MAC/tree hashing */ +/* (this code is identical for Optimized and Reference versions) */ + +/* finalize the hash computation and output the block, no OUTPUT stage */ +int +Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) +{ + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + /* process the final block */ + Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* "output" the state bytes */ + Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES); + + return (SKEIN_SUCCESS); +} + +/* finalize the hash computation and output the block, no OUTPUT stage */ +int +Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) +{ + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + /* process the final block */ + Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* "output" the state bytes */ + Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES); + + return (SKEIN_SUCCESS); +} + +/* finalize the hash computation and output the block, no OUTPUT stage */ +int +Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) +{ + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); + + /* tag as the final block */ + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; + /* zero pad b[] if necessary */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) + bzero(&ctx->b[ctx->h.bCnt], + SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + /* process the final block */ + Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); + + /* "output" the state bytes */ + Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES); + + return (SKEIN_SUCCESS); +} + +#if SKEIN_TREE_HASH +/* just do the OUTPUT stage */ +int +Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN_256_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN_256_BLOCK_BYTES; + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256, &ctx->h, n, + hashVal + i * SKEIN_256_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} + +/* just do the OUTPUT stage */ +int +Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN_512_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN_512_BLOCK_BYTES; + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256, &ctx->h, n, + hashVal + i * SKEIN_512_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} + +/* just do the OUTPUT stage */ +int +Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) +{ + size_t i, n, byteCnt; + uint64_t X[SKEIN1024_STATE_WORDS]; + + /* catch uninitialized context */ + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); + + /* now output the result */ + /* total number of output bytes */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + /* run Threefish in "counter mode" to generate output */ + /* zero out b[], so it can hold the counter */ + bzero(ctx->b, sizeof (ctx->b)); + /* keep a local copy of counter mode "key" */ + bcopy(ctx->X, X, sizeof (X)); + for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) { + /* build the counter block */ + uint64_t tmp = Skein_Swap64((uint64_t)i); + bcopy(&tmp, ctx->b, sizeof (tmp)); + Skein_Start_New_Type(ctx, OUT_FINAL); + /* run "counter mode" */ + Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); + /* number of output bytes left to go */ + n = byteCnt - i * SKEIN1024_BLOCK_BYTES; + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES, + ctx->X, n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256, &ctx->h, n, + hashVal + i * SKEIN1024_BLOCK_BYTES); + /* restore the counter mode key for next time */ + bcopy(X, ctx->X, sizeof (X)); + } + return (SKEIN_SUCCESS); +} +#endif + +#ifdef _KERNEL +EXPORT_SYMBOL(Skein_512_Init); +EXPORT_SYMBOL(Skein_512_InitExt); +EXPORT_SYMBOL(Skein_512_Update); +EXPORT_SYMBOL(Skein_512_Final); +#endif diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c new file mode 100644 index 000000000..d2e811963 --- /dev/null +++ b/module/icp/algs/skein/skein_block.c @@ -0,0 +1,793 @@ +/* + * Implementation of the Skein block functions. + * Source code author: Doug Whiting, 2008. + * This algorithm and source code is released to the public domain. + * Compile-time switches: + * SKEIN_USE_ASM -- set bits (256/512/1024) to select which + * versions use ASM code for block processing + * [default: use C for all block sizes] + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ + +#include +#include "skein_impl.h" +#include /* for _ILP32 */ + +#ifndef SKEIN_USE_ASM +#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ +#endif + +#ifndef SKEIN_LOOP +/* + * The low-level checksum routines use a lot of stack space. On systems where + * small stacks frame are enforced (like 32-bit kernel builds), do not unroll + * checksum calculations to save stack space. + * + * Even with no loops unrolled, we still can exceed the 1k stack frame limit + * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32). We can + * safely ignore it though, since that the checksum functions will be called + * from a worker thread that won't be using much stack. That's why we have + * the #pragma here to ignore the warning. + */ +#if defined(_ILP32) || defined(__powerpc) /* Assume small stack */ +#pragma GCC diagnostic ignored "-Wframe-larger-than=" +/* + * We're running on 32-bit, don't unroll loops to save stack frame space + * + * Due to the ways the calculations on SKEIN_LOOP are done in + * Skein_*_Process_Block(), a value of 111 disables unrolling loops + * in any of those functions. + */ +#define SKEIN_LOOP 111 +#else +/* We're compiling with large stacks */ +#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ +#endif +#endif + +/* some useful definitions for code here */ +#define BLK_BITS (WCNT*64) +#define KW_TWK_BASE (0) +#define KW_KEY_BASE (3) +#define ks (kw + KW_KEY_BASE) +#define ts (kw + KW_TWK_BASE) + +/* no debugging in Illumos version */ +#define DebugSaveTweak(ctx) + +/* Skein_256 */ +#if !(SKEIN_USE_ASM & 256) + +void +Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd) +{ /* do it in C */ + enum { + WCNT = SKEIN_256_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_256_ROUNDS_TOTAL / 8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10) +#else +#define SKEIN_UNROLL_256 (0) +#endif + +#if SKEIN_UNROLL_256 +#if (RCNT % SKEIN_UNROLL_256) +#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ +#endif + size_t r; + /* key schedule words : chaining vars + tweak + "rotation" */ + uint64_t kw[WCNT + 4 + RCNT * 2]; +#else + uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ +#endif + /* local copy of context vars, for speed */ + uint64_t X0, X1, X2, X3; + uint64_t w[WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + /* use for debugging (help compiler put Xn in registers) */ + const uint64_t *Xptr[4]; + Xptr[0] = &X0; + Xptr[1] = &X1; + Xptr[2] = &X2; + Xptr[3] = &X3; +#endif + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* + * this implementation only supports 2**64 input bytes + * (no carry out here) + */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + /* get input block in little-endian format */ + Skein_Get64_LSB_First(w, blkPtr, WCNT); + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1] + ts[0]; + X2 = w[2] + ks[2] + ts[1]; + X3 = w[3] + ks[3]; + + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, + Xptr); /* show starting state values */ + + blkPtr += SKEIN_256_BLOCK_BYTES; + + /* run the rounds */ + +#define Round256(p0, p1, p2, p3, ROT, rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \ + Round256(p0, p1, p2, p3, ROT, rNum) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); + +#define I256(R) \ + X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \ + X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \ + X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \ + X3 += ks[((R) + 4) % 5] + (R) + 1; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); +#else /* looping version */ +#define R256(p0, p1, p2, p3, ROT, rNum) \ + Round256(p0, p1, p2, p3, ROT, rNum) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); + +#define I256(R) \ + X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \ + X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \ + X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \ + X3 += ks[r + (R) + 3] + r + (R); \ + ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */ \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); + + /* loop thru it */ + for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) +#endif + { +#define R256_8_rounds(R) \ + R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ + R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ + R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ + R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ + I256(2 * (R)); \ + R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ + R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ + R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ + R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ + I256(2 * (R) + 1); + + R256_8_rounds(0); + +#define R256_Unroll_R(NN) \ + ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \ + (SKEIN_UNROLL_256 > (NN))) + +#if R256_Unroll_R(1) + R256_8_rounds(1); +#endif +#if R256_Unroll_R(2) + R256_8_rounds(2); +#endif +#if R256_Unroll_R(3) + R256_8_rounds(3); +#endif +#if R256_Unroll_R(4) + R256_8_rounds(4); +#endif +#if R256_Unroll_R(5) + R256_8_rounds(5); +#endif +#if R256_Unroll_R(6) + R256_8_rounds(6); +#endif +#if R256_Unroll_R(7) + R256_8_rounds(7); +#endif +#if R256_Unroll_R(8) + R256_8_rounds(8); +#endif +#if R256_Unroll_R(9) + R256_8_rounds(9); +#endif +#if R256_Unroll_R(10) + R256_8_rounds(10); +#endif +#if R256_Unroll_R(11) + R256_8_rounds(11); +#endif +#if R256_Unroll_R(12) + R256_8_rounds(12); +#endif +#if R256_Unroll_R(13) + R256_8_rounds(13); +#endif +#if R256_Unroll_R(14) + R256_8_rounds(14); +#endif +#if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in Skein_256_Process_Block" +#endif + } + /* + * do the final "feedforward" xor, update context chaining vars + */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + + Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; +} + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t +Skein_256_Process_Block_CodeSize(void) +{ + return ((uint8_t *)Skein_256_Process_Block_CodeSize) - + ((uint8_t *)Skein_256_Process_Block); +} + +uint_t +Skein_256_Unroll_Cnt(void) +{ + return (SKEIN_UNROLL_256); +} +#endif +#endif + +/* Skein_512 */ +#if !(SKEIN_USE_ASM & 512) +void +Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd) +{ /* do it in C */ + enum { + WCNT = SKEIN_512_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL / 8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif + size_t r; + /* key schedule words : chaining vars + tweak + "rotation" */ + uint64_t kw[WCNT + 4 + RCNT * 2]; +#else + uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ +#endif + /* local copy of vars, for speed */ + uint64_t X0, X1, X2, X3, X4, X5, X6, X7; + uint64_t w[WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + /* use for debugging (help compiler put Xn in registers) */ + const uint64_t *Xptr[8]; + Xptr[0] = &X0; + Xptr[1] = &X1; + Xptr[2] = &X2; + Xptr[3] = &X3; + Xptr[4] = &X4; + Xptr[5] = &X5; + Xptr[6] = &X6; + Xptr[7] = &X7; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* + * this implementation only supports 2**64 input bytes + * (no carry out here) + */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + /* get input block in little-endian format */ + Skein_Get64_LSB_First(w, blkPtr, WCNT); + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; + + blkPtr += SKEIN_512_BLOCK_BYTES; + + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, + Xptr); + /* run the rounds */ +#define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\ + X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\ + X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\ + X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \ + Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); + +#define I512(R) \ + X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */\ + X1 += ks[((R) + 2) % 9]; \ + X2 += ks[((R) + 3) % 9]; \ + X3 += ks[((R) + 4) % 9]; \ + X4 += ks[((R) + 5) % 9]; \ + X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ + X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ + X7 += ks[((R) + 8) % 9] + (R) + 1; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); +#else /* looping version */ +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ + Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); + +#define I512(R) \ + X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \ + X1 += ks[r + (R) + 1]; \ + X2 += ks[r + (R) + 2]; \ + X3 += ks[r + (R) + 3]; \ + X4 += ks[r + (R) + 4]; \ + X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ + X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ + X7 += ks[r + (R) + 7] + r + (R); \ + ks[r + (R)+8] = ks[r + (R) - 1]; /* rotate key schedule */\ + ts[r + (R)+2] = ts[r + (R) - 1]; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); + + /* loop thru it */ + for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) +#endif /* end of looped code definitions */ + { +#define R512_8_rounds(R) /* do 8 full rounds */ \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ + I512(2 * (R)); \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ + I512(2*(R) + 1); /* and key injection */ + + R512_8_rounds(0); + +#define R512_Unroll_R(NN) \ + ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \ + (SKEIN_UNROLL_512 > (NN))) + +#if R512_Unroll_R(1) + R512_8_rounds(1); +#endif +#if R512_Unroll_R(2) + R512_8_rounds(2); +#endif +#if R512_Unroll_R(3) + R512_8_rounds(3); +#endif +#if R512_Unroll_R(4) + R512_8_rounds(4); +#endif +#if R512_Unroll_R(5) + R512_8_rounds(5); +#endif +#if R512_Unroll_R(6) + R512_8_rounds(6); +#endif +#if R512_Unroll_R(7) + R512_8_rounds(7); +#endif +#if R512_Unroll_R(8) + R512_8_rounds(8); +#endif +#if R512_Unroll_R(9) + R512_8_rounds(9); +#endif +#if R512_Unroll_R(10) + R512_8_rounds(10); +#endif +#if R512_Unroll_R(11) + R512_8_rounds(11); +#endif +#if R512_Unroll_R(12) + R512_8_rounds(12); +#endif +#if R512_Unroll_R(13) + R512_8_rounds(13); +#endif +#if R512_Unroll_R(14) + R512_8_rounds(14); +#endif +#if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in Skein_512_Process_Block" +#endif + } + + /* + * do the final "feedforward" xor, update context chaining vars + */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + ctx->X[4] = X4 ^ w[4]; + ctx->X[5] = X5 ^ w[5]; + ctx->X[6] = X6 ^ w[6]; + ctx->X[7] = X7 ^ w[7]; + Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; +} + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t +Skein_512_Process_Block_CodeSize(void) +{ + return ((uint8_t *)Skein_512_Process_Block_CodeSize) - + ((uint8_t *)Skein_512_Process_Block); +} + +uint_t +Skein_512_Unroll_Cnt(void) +{ + return (SKEIN_UNROLL_512); +} +#endif +#endif + +/* Skein1024 */ +#if !(SKEIN_USE_ASM & 1024) +void +Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr, + size_t blkCnt, size_t byteCntAdd) +{ + /* do it in C, always looping (unrolled is bigger AND slower!) */ + enum { + WCNT = SKEIN1024_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN1024_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif + size_t r; + /* key schedule words : chaining vars + tweak + "rotation" */ + uint64_t kw[WCNT + 4 + RCNT * 2]; +#else + uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */ +#endif + + /* local copy of vars, for speed */ + uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11, + X12, X13, X14, X15; + uint64_t w[WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + /* use for debugging (help compiler put Xn in registers) */ + const uint64_t *Xptr[16]; + Xptr[0] = &X00; + Xptr[1] = &X01; + Xptr[2] = &X02; + Xptr[3] = &X03; + Xptr[4] = &X04; + Xptr[5] = &X05; + Xptr[6] = &X06; + Xptr[7] = &X07; + Xptr[8] = &X08; + Xptr[9] = &X09; + Xptr[10] = &X10; + Xptr[11] = &X11; + Xptr[12] = &X12; + Xptr[13] = &X13; + Xptr[14] = &X14; + Xptr[15] = &X15; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* + * this implementation only supports 2**64 input bytes + * (no carry out here) + */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ctx->X[8]; + ks[9] = ctx->X[9]; + ks[10] = ctx->X[10]; + ks[11] = ctx->X[11]; + ks[12] = ctx->X[12]; + ks[13] = ctx->X[13]; + ks[14] = ctx->X[14]; + ks[15] = ctx->X[15]; + ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ + ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ + ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + /* get input block in little-endian format */ + Skein_Get64_LSB_First(w, blkPtr, WCNT); + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); + + X00 = w[0] + ks[0]; /* do the first full key injection */ + X01 = w[1] + ks[1]; + X02 = w[2] + ks[2]; + X03 = w[3] + ks[3]; + X04 = w[4] + ks[4]; + X05 = w[5] + ks[5]; + X06 = w[6] + ks[6]; + X07 = w[7] + ks[7]; + X08 = w[8] + ks[8]; + X09 = w[9] + ks[9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, + Xptr); + +#define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ + pD, pE, pF, ROT, rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\ + X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\ + X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\ + X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\ + X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\ + X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\ + X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\ + X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \ + pE, pF, ROT, rn) \ + Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ + pD, pE, pF, ROT, rn) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr); + +#define I1024(R) \ + X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */\ + X01 += ks[((R) + 2) % 17]; \ + X02 += ks[((R) + 3) % 17]; \ + X03 += ks[((R) + 4) % 17]; \ + X04 += ks[((R) + 5) % 17]; \ + X05 += ks[((R) + 6) % 17]; \ + X06 += ks[((R) + 7) % 17]; \ + X07 += ks[((R) + 8) % 17]; \ + X08 += ks[((R) + 9) % 17]; \ + X09 += ks[((R) + 10) % 17]; \ + X10 += ks[((R) + 11) % 17]; \ + X11 += ks[((R) + 12) % 17]; \ + X12 += ks[((R) + 13) % 17]; \ + X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ + X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ + X15 += ks[((R) + 16) % 17] + (R) +1; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); +#else /* looping version */ +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \ + pE, pF, ROT, rn) \ + Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \ + pD, pE, pF, ROT, rn) \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr); + +#define I1024(R) \ + X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \ + X01 += ks[r + (R) + 1]; \ + X02 += ks[r + (R) + 2]; \ + X03 += ks[r + (R) + 3]; \ + X04 += ks[r + (R) + 4]; \ + X05 += ks[r + (R) + 5]; \ + X06 += ks[r + (R) + 6]; \ + X07 += ks[r + (R) + 7]; \ + X08 += ks[r + (R) + 8]; \ + X09 += ks[r + (R) + 9]; \ + X10 += ks[r + (R) + 10]; \ + X11 += ks[r + (R) + 11]; \ + X12 += ks[r + (R) + 12]; \ + X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ + X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ + X15 += ks[r + (R) + 15] + r + (R); \ + ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ + Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); + + /* loop thru it */ + for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) +#endif + { +#define R1024_8_rounds(R) /* do 8 full rounds */ \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \ + 14, 15, R1024_0, 8 * (R) + 1); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \ + 08, 01, R1024_1, 8 * (R) + 2); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \ + 10, 09, R1024_2, 8 * (R) + 3); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \ + 12, 07, R1024_3, 8 * (R) + 4); \ + I1024(2 * (R)); \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \ + 14, 15, R1024_4, 8 * (R) + 5); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \ + 08, 01, R1024_5, 8 * (R) + 6); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \ + 10, 09, R1024_6, 8 * (R) + 7); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \ + 12, 07, R1024_7, 8 * (R) + 8); \ + I1024(2 * (R) + 1); + + R1024_8_rounds(0); + +#define R1024_Unroll_R(NN) \ + ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \ + (SKEIN_UNROLL_1024 > (NN))) + +#if R1024_Unroll_R(1) + R1024_8_rounds(1); +#endif +#if R1024_Unroll_R(2) + R1024_8_rounds(2); +#endif +#if R1024_Unroll_R(3) + R1024_8_rounds(3); +#endif +#if R1024_Unroll_R(4) + R1024_8_rounds(4); +#endif +#if R1024_Unroll_R(5) + R1024_8_rounds(5); +#endif +#if R1024_Unroll_R(6) + R1024_8_rounds(6); +#endif +#if R1024_Unroll_R(7) + R1024_8_rounds(7); +#endif +#if R1024_Unroll_R(8) + R1024_8_rounds(8); +#endif +#if R1024_Unroll_R(9) + R1024_8_rounds(9); +#endif +#if R1024_Unroll_R(10) + R1024_8_rounds(10); +#endif +#if R1024_Unroll_R(11) + R1024_8_rounds(11); +#endif +#if R1024_Unroll_R(12) + R1024_8_rounds(12); +#endif +#if R1024_Unroll_R(13) + R1024_8_rounds(13); +#endif +#if R1024_Unroll_R(14) + R1024_8_rounds(14); +#endif +#if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" +#endif + } + /* + * do the final "feedforward" xor, update context chaining vars + */ + + ctx->X[0] = X00 ^ w[0]; + ctx->X[1] = X01 ^ w[1]; + ctx->X[2] = X02 ^ w[2]; + ctx->X[3] = X03 ^ w[3]; + ctx->X[4] = X04 ^ w[4]; + ctx->X[5] = X05 ^ w[5]; + ctx->X[6] = X06 ^ w[6]; + ctx->X[7] = X07 ^ w[7]; + ctx->X[8] = X08 ^ w[8]; + ctx->X[9] = X09 ^ w[9]; + ctx->X[10] = X10 ^ w[10]; + ctx->X[11] = X11 ^ w[11]; + ctx->X[12] = X12 ^ w[12]; + ctx->X[13] = X13 ^ w[13]; + ctx->X[14] = X14 ^ w[14]; + ctx->X[15] = X15 ^ w[15]; + + Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + blkPtr += SKEIN1024_BLOCK_BYTES; + } while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; +} + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t +Skein1024_Process_Block_CodeSize(void) +{ + return ((uint8_t *)Skein1024_Process_Block_CodeSize) - + ((uint8_t *)Skein1024_Process_Block); +} + +uint_t +Skein1024_Unroll_Cnt(void) +{ + return (SKEIN_UNROLL_1024); +} +#endif +#endif diff --git a/module/icp/algs/skein/skein_impl.h b/module/icp/algs/skein/skein_impl.h new file mode 100644 index 000000000..e83a06971 --- /dev/null +++ b/module/icp/algs/skein/skein_impl.h @@ -0,0 +1,289 @@ +/* + * Internal definitions for Skein hashing. + * Source code author: Doug Whiting, 2008. + * This algorithm and source code is released to the public domain. + * + * The following compile-time switches may be defined to control some + * tradeoffs between speed, code size, error checking, and security. + * + * The "default" note explains what happens when the switch is not defined. + * + * SKEIN_DEBUG -- make callouts from inside Skein code + * to examine/display intermediate values. + * [default: no callouts (no overhead)] + * + * SKEIN_ERR_CHECK -- how error checking is handled inside Skein + * code. If not defined, most error checking + * is disabled (for performance). Otherwise, + * the switch value is interpreted as: + * 0: use assert() to flag errors + * 1: return SKEIN_FAIL to flag errors + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ + +#ifndef _SKEIN_IMPL_H_ +#define _SKEIN_IMPL_H_ + +#include +#include "skein_impl.h" +#include "skein_port.h" + +/* determine where we can get bcopy/bzero declarations */ +#ifdef _KERNEL +#include +#else +#include +#endif + +/* + * "Internal" Skein definitions + * -- not needed for sequential hashing API, but will be + * helpful for other uses of Skein (e.g., tree hash mode). + * -- included here so that they can be shared between + * reference and optimized code. + */ + +/* tweak word T[1]: bit field starting positions */ +/* offset 64 because it's the second word */ +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) + +/* bits 112..118: level in hash tree */ +#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) +/* bit 119: partial final input byte */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) +/* bits 120..125: type field */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) +/* bits 126: first block flag */ +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) +/* bit 127: final block flag */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) + +/* tweak word T[1]: flag bit definition(s) */ +#define SKEIN_T1_FLAG_FIRST (((uint64_t)1) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_FINAL (((uint64_t)1) << SKEIN_T1_POS_FINAL) +#define SKEIN_T1_FLAG_BIT_PAD (((uint64_t)1) << SKEIN_T1_POS_BIT_PAD) + +/* tweak word T[1]: tree level bit field mask */ +#define SKEIN_T1_TREE_LVL_MASK (((uint64_t)0x7F) << SKEIN_T1_POS_TREE_LVL) +#define SKEIN_T1_TREE_LEVEL(n) (((uint64_t)(n)) << SKEIN_T1_POS_TREE_LVL) + +/* tweak word T[1]: block type field */ +#define SKEIN_BLK_TYPE_KEY (0) /* key, for MAC and KDF */ +#define SKEIN_BLK_TYPE_CFG (4) /* configuration block */ +#define SKEIN_BLK_TYPE_PERS (8) /* personalization string */ +#define SKEIN_BLK_TYPE_PK (12) /* public key (for signature hashing) */ +#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ +#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ +#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ + +#define SKEIN_T1_BLK_TYPE(T) \ + (((uint64_t)(SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) +/* key, for MAC and KDF */ +#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) +/* configuration block */ +#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) +/* personalization string */ +#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) +/* public key (for digital signature hashing) */ +#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) +/* key identifier for KDF */ +#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) +/* nonce for PRNG */ +#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE) +/* message processing */ +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) +/* output stage */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) +/* field bit mask */ +#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) + +#define SKEIN_T1_BLK_TYPE_CFG_FINAL \ + (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) +#define SKEIN_T1_BLK_TYPE_OUT_FINAL \ + (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define SKEIN_VERSION (1) + +#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ +#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian) */ +#endif + +#define SKEIN_MK_64(hi32, lo32) ((lo32) + (((uint64_t)(hi32)) << 32)) +#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION, SKEIN_ID_STRING_LE) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA, 0xA9FC1A22) + +#define SKEIN_CFG_STR_LEN (4*8) + +/* bit field definitions in config block treeInfo word */ +#define SKEIN_CFG_TREE_LEAF_SIZE_POS (0) +#define SKEIN_CFG_TREE_NODE_SIZE_POS (8) +#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) + +#define SKEIN_CFG_TREE_LEAF_SIZE_MSK \ + (((uint64_t)0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) +#define SKEIN_CFG_TREE_NODE_SIZE_MSK \ + (((uint64_t)0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) +#define SKEIN_CFG_TREE_MAX_LEVEL_MSK \ + (((uint64_t)0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) + +#define SKEIN_CFG_TREE_INFO(leaf, node, maxLvl) \ + ((((uint64_t)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ + (((uint64_t)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ + (((uint64_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS)) + +/* use as treeInfo in InitExt() call for sequential processing */ +#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0, 0, 0) + +/* + * Skein macros for getting/setting tweak words, etc. + * These are useful for partial input bytes, hash tree init/update, etc. + */ +#define Skein_Get_Tweak(ctxPtr, TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) +#define Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal) \ + do { \ + (ctxPtr)->h.T[TWK_NUM] = (tVal); \ + _NOTE(CONSTCOND) \ + } while (0) + +#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr, 0) +#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr, 1) +#define Skein_Set_T0(ctxPtr, T0) Skein_Set_Tweak(ctxPtr, 0, T0) +#define Skein_Set_T1(ctxPtr, T1) Skein_Set_Tweak(ctxPtr, 1, T1) + +/* set both tweak words at once */ +#define Skein_Set_T0_T1(ctxPtr, T0, T1) \ + do { \ + Skein_Set_T0(ctxPtr, (T0)); \ + Skein_Set_T1(ctxPtr, (T1)); \ + _NOTE(CONSTCOND) \ + } while (0) + +#define Skein_Set_Type(ctxPtr, BLK_TYPE) \ + Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE) + +/* + * set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; + */ +#define Skein_Start_New_Type(ctxPtr, BLK_TYPE) \ + do { \ + Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST | \ + SKEIN_T1_BLK_TYPE_ ## BLK_TYPE); \ + (ctxPtr)->h.bCnt = 0; \ + _NOTE(CONSTCOND) \ + } while (0) + +#define Skein_Clear_First_Flag(hdr) \ + do { \ + (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; \ + _NOTE(CONSTCOND) \ + } while (0) +#define Skein_Set_Bit_Pad_Flag(hdr) \ + do { \ + (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; \ + _NOTE(CONSTCOND) \ + } while (0) + +#define Skein_Set_Tree_Level(hdr, height) \ + do { \ + (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height); \ + _NOTE(CONSTCOND) \ + } while (0) + +/* + * "Internal" Skein definitions for debugging and error checking + * Note: in Illumos we always disable debugging features. + */ +#define Skein_Show_Block(bits, ctx, X, blkPtr, wPtr, ksEvenPtr, ksOddPtr) +#define Skein_Show_Round(bits, ctx, r, X) +#define Skein_Show_R_Ptr(bits, ctx, r, X_ptr) +#define Skein_Show_Final(bits, ctx, cnt, outPtr) +#define Skein_Show_Key(bits, ctx, key, keyBytes) + +/* run-time checks (e.g., bad params, uninitialized context)? */ +#ifndef SKEIN_ERR_CHECK +/* default: ignore all Asserts, for performance */ +#define Skein_Assert(x, retCode) +#define Skein_assert(x) +#elif defined(SKEIN_ASSERT) +#include +#define Skein_Assert(x, retCode) ASSERT(x) +#define Skein_assert(x) ASSERT(x) +#else +#include +/* caller error */ +#define Skein_Assert(x, retCode) \ + do { \ + if (!(x)) \ + return (retCode); \ + _NOTE(CONSTCOND) \ + } while (0) +/* internal error */ +#define Skein_assert(x) ASSERT(x) +#endif + +/* + * Skein block function constants (shared across Ref and Opt code) + */ +enum { + /* Skein_256 round rotation constants */ + R_256_0_0 = 14, R_256_0_1 = 16, + R_256_1_0 = 52, R_256_1_1 = 57, + R_256_2_0 = 23, R_256_2_1 = 40, + R_256_3_0 = 5, R_256_3_1 = 37, + R_256_4_0 = 25, R_256_4_1 = 33, + R_256_5_0 = 46, R_256_5_1 = 12, + R_256_6_0 = 58, R_256_6_1 = 22, + R_256_7_0 = 32, R_256_7_1 = 32, + + /* Skein_512 round rotation constants */ + R_512_0_0 = 46, R_512_0_1 = 36, R_512_0_2 = 19, R_512_0_3 = 37, + R_512_1_0 = 33, R_512_1_1 = 27, R_512_1_2 = 14, R_512_1_3 = 42, + R_512_2_0 = 17, R_512_2_1 = 49, R_512_2_2 = 36, R_512_2_3 = 39, + R_512_3_0 = 44, R_512_3_1 = 9, R_512_3_2 = 54, R_512_3_3 = 56, + R_512_4_0 = 39, R_512_4_1 = 30, R_512_4_2 = 34, R_512_4_3 = 24, + R_512_5_0 = 13, R_512_5_1 = 50, R_512_5_2 = 10, R_512_5_3 = 17, + R_512_6_0 = 25, R_512_6_1 = 29, R_512_6_2 = 39, R_512_6_3 = 43, + R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22, + + /* Skein1024 round rotation constants */ + R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 = + 47, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37, + R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 = + 55, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52, + R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 = + 13, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17, + R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 = + 41, R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25, + R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 = + 31, R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30, + R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 = + 51, R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41, + R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 = + 46, R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25, + R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 = + 52, R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20 +}; + +/* number of rounds for the different block sizes */ +#define SKEIN_256_ROUNDS_TOTAL (72) +#define SKEIN_512_ROUNDS_TOTAL (72) +#define SKEIN1024_ROUNDS_TOTAL (80) + + +extern const uint64_t SKEIN_256_IV_128[]; +extern const uint64_t SKEIN_256_IV_160[]; +extern const uint64_t SKEIN_256_IV_224[]; +extern const uint64_t SKEIN_256_IV_256[]; +extern const uint64_t SKEIN_512_IV_128[]; +extern const uint64_t SKEIN_512_IV_160[]; +extern const uint64_t SKEIN_512_IV_224[]; +extern const uint64_t SKEIN_512_IV_256[]; +extern const uint64_t SKEIN_512_IV_384[]; +extern const uint64_t SKEIN_512_IV_512[]; +extern const uint64_t SKEIN1024_IV_384[]; +extern const uint64_t SKEIN1024_IV_512[]; +extern const uint64_t SKEIN1024_IV_1024[]; + +#endif /* _SKEIN_IMPL_H_ */ diff --git a/module/icp/algs/skein/skein_iv.c b/module/icp/algs/skein/skein_iv.c new file mode 100644 index 000000000..140d38f76 --- /dev/null +++ b/module/icp/algs/skein/skein_iv.c @@ -0,0 +1,185 @@ +/* + * Pre-computed Skein IVs + * + * NOTE: these values are not "magic" constants, but + * are generated using the Threefish block function. + * They are pre-computed here only for speed; i.e., to + * avoid the need for a Threefish call during Init(). + * + * The IV for any fixed hash length may be pre-computed. + * Only the most common values are included here. + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ +/* + * Illumos implementation note: these constants are for Skein v1.3 as per: + * http://www.skein-hash.info/sites/default/files/skein1.3.pdf + */ + +#include /* get Skein macros and types */ +#include "skein_impl.h" /* get internal definitions */ + +#define MK_64 SKEIN_MK_64 + +/* blkSize = 256 bits. hashSize = 128 bits */ +const uint64_t SKEIN_256_IV_128[] = { + MK_64(0xE1111906, 0x964D7260), + MK_64(0x883DAAA7, 0x7C8D811C), + MK_64(0x10080DF4, 0x91960F7A), + MK_64(0xCCF7DDE5, 0xB45BC1C2) +}; + +/* blkSize = 256 bits. hashSize = 160 bits */ +const uint64_t SKEIN_256_IV_160[] = { + MK_64(0x14202314, 0x72825E98), + MK_64(0x2AC4E9A2, 0x5A77E590), + MK_64(0xD47A5856, 0x8838D63E), + MK_64(0x2DD2E496, 0x8586AB7D) +}; + +/* blkSize = 256 bits. hashSize = 224 bits */ +const uint64_t SKEIN_256_IV_224[] = { + MK_64(0xC6098A8C, 0x9AE5EA0B), + MK_64(0x876D5686, 0x08C5191C), + MK_64(0x99CB88D7, 0xD7F53884), + MK_64(0x384BDDB1, 0xAEDDB5DE) +}; + +/* blkSize = 256 bits. hashSize = 256 bits */ +const uint64_t SKEIN_256_IV_256[] = { + MK_64(0xFC9DA860, 0xD048B449), + MK_64(0x2FCA6647, 0x9FA7D833), + MK_64(0xB33BC389, 0x6656840F), + MK_64(0x6A54E920, 0xFDE8DA69) +}; + +/* blkSize = 512 bits. hashSize = 128 bits */ +const uint64_t SKEIN_512_IV_128[] = { + MK_64(0xA8BC7BF3, 0x6FBF9F52), + MK_64(0x1E9872CE, 0xBD1AF0AA), + MK_64(0x309B1790, 0xB32190D3), + MK_64(0xBCFBB854, 0x3F94805C), + MK_64(0x0DA61BCD, 0x6E31B11B), + MK_64(0x1A18EBEA, 0xD46A32E3), + MK_64(0xA2CC5B18, 0xCE84AA82), + MK_64(0x6982AB28, 0x9D46982D) +}; + +/* blkSize = 512 bits. hashSize = 160 bits */ +const uint64_t SKEIN_512_IV_160[] = { + MK_64(0x28B81A2A, 0xE013BD91), + MK_64(0xC2F11668, 0xB5BDF78F), + MK_64(0x1760D8F3, 0xF6A56F12), + MK_64(0x4FB74758, 0x8239904F), + MK_64(0x21EDE07F, 0x7EAF5056), + MK_64(0xD908922E, 0x63ED70B8), + MK_64(0xB8EC76FF, 0xECCB52FA), + MK_64(0x01A47BB8, 0xA3F27A6E) +}; + +/* blkSize = 512 bits. hashSize = 224 bits */ +const uint64_t SKEIN_512_IV_224[] = { + MK_64(0xCCD06162, 0x48677224), + MK_64(0xCBA65CF3, 0xA92339EF), + MK_64(0x8CCD69D6, 0x52FF4B64), + MK_64(0x398AED7B, 0x3AB890B4), + MK_64(0x0F59D1B1, 0x457D2BD0), + MK_64(0x6776FE65, 0x75D4EB3D), + MK_64(0x99FBC70E, 0x997413E9), + MK_64(0x9E2CFCCF, 0xE1C41EF7) +}; + +/* blkSize = 512 bits. hashSize = 256 bits */ +const uint64_t SKEIN_512_IV_256[] = { + MK_64(0xCCD044A1, 0x2FDB3E13), + MK_64(0xE8359030, 0x1A79A9EB), + MK_64(0x55AEA061, 0x4F816E6F), + MK_64(0x2A2767A4, 0xAE9B94DB), + MK_64(0xEC06025E, 0x74DD7683), + MK_64(0xE7A436CD, 0xC4746251), + MK_64(0xC36FBAF9, 0x393AD185), + MK_64(0x3EEDBA18, 0x33EDFC13) +}; + +/* blkSize = 512 bits. hashSize = 384 bits */ +const uint64_t SKEIN_512_IV_384[] = { + MK_64(0xA3F6C6BF, 0x3A75EF5F), + MK_64(0xB0FEF9CC, 0xFD84FAA4), + MK_64(0x9D77DD66, 0x3D770CFE), + MK_64(0xD798CBF3, 0xB468FDDA), + MK_64(0x1BC4A666, 0x8A0E4465), + MK_64(0x7ED7D434, 0xE5807407), + MK_64(0x548FC1AC, 0xD4EC44D6), + MK_64(0x266E1754, 0x6AA18FF8) +}; + +/* blkSize = 512 bits. hashSize = 512 bits */ +const uint64_t SKEIN_512_IV_512[] = { + MK_64(0x4903ADFF, 0x749C51CE), + MK_64(0x0D95DE39, 0x9746DF03), + MK_64(0x8FD19341, 0x27C79BCE), + MK_64(0x9A255629, 0xFF352CB1), + MK_64(0x5DB62599, 0xDF6CA7B0), + MK_64(0xEABE394C, 0xA9D5C3F4), + MK_64(0x991112C7, 0x1A75B523), + MK_64(0xAE18A40B, 0x660FCC33) +}; + +/* blkSize = 1024 bits. hashSize = 384 bits */ +const uint64_t SKEIN1024_IV_384[] = { + MK_64(0x5102B6B8, 0xC1894A35), + MK_64(0xFEEBC9E3, 0xFE8AF11A), + MK_64(0x0C807F06, 0xE32BED71), + MK_64(0x60C13A52, 0xB41A91F6), + MK_64(0x9716D35D, 0xD4917C38), + MK_64(0xE780DF12, 0x6FD31D3A), + MK_64(0x797846B6, 0xC898303A), + MK_64(0xB172C2A8, 0xB3572A3B), + MK_64(0xC9BC8203, 0xA6104A6C), + MK_64(0x65909338, 0xD75624F4), + MK_64(0x94BCC568, 0x4B3F81A0), + MK_64(0x3EBBF51E, 0x10ECFD46), + MK_64(0x2DF50F0B, 0xEEB08542), + MK_64(0x3B5A6530, 0x0DBC6516), + MK_64(0x484B9CD2, 0x167BBCE1), + MK_64(0x2D136947, 0xD4CBAFEA) +}; + +/* blkSize = 1024 bits. hashSize = 512 bits */ +const uint64_t SKEIN1024_IV_512[] = { + MK_64(0xCAEC0E5D, 0x7C1B1B18), + MK_64(0xA01B0E04, 0x5F03E802), + MK_64(0x33840451, 0xED912885), + MK_64(0x374AFB04, 0xEAEC2E1C), + MK_64(0xDF25A0E2, 0x813581F7), + MK_64(0xE4004093, 0x8B12F9D2), + MK_64(0xA662D539, 0xC2ED39B6), + MK_64(0xFA8B85CF, 0x45D8C75A), + MK_64(0x8316ED8E, 0x29EDE796), + MK_64(0x053289C0, 0x2E9F91B8), + MK_64(0xC3F8EF1D, 0x6D518B73), + MK_64(0xBDCEC3C4, 0xD5EF332E), + MK_64(0x549A7E52, 0x22974487), + MK_64(0x67070872, 0x5B749816), + MK_64(0xB9CD28FB, 0xF0581BD1), + MK_64(0x0E2940B8, 0x15804974) +}; + +/* blkSize = 1024 bits. hashSize = 1024 bits */ +const uint64_t SKEIN1024_IV_1024[] = { + MK_64(0xD593DA07, 0x41E72355), + MK_64(0x15B5E511, 0xAC73E00C), + MK_64(0x5180E5AE, 0xBAF2C4F0), + MK_64(0x03BD41D3, 0xFCBCAFAF), + MK_64(0x1CAEC6FD, 0x1983A898), + MK_64(0x6E510B8B, 0xCDD0589F), + MK_64(0x77E2BDFD, 0xC6394ADA), + MK_64(0xC11E1DB5, 0x24DCB0A3), + MK_64(0xD6D14AF9, 0xC6329AB5), + MK_64(0x6A9B0BFC, 0x6EB67E0D), + MK_64(0x9243C60D, 0xCCFF1332), + MK_64(0x1A1F1DDE, 0x743F02D4), + MK_64(0x0996753C, 0x10ED0BB8), + MK_64(0x6572DD22, 0xF2B4969A), + MK_64(0x61FD3062, 0xD00A579A), + MK_64(0x1DE0536E, 0x8682E539) +}; diff --git a/module/icp/algs/skein/skein_port.h b/module/icp/algs/skein/skein_port.h new file mode 100644 index 000000000..1b0225236 --- /dev/null +++ b/module/icp/algs/skein/skein_port.h @@ -0,0 +1,128 @@ +/* + * Platform-specific definitions for Skein hash function. + * + * Source code author: Doug Whiting, 2008. + * + * This algorithm and source code is released to the public domain. + * + * Many thanks to Brian Gladman for his portable header files. + * + * To port Skein to an "unsupported" platform, change the definitions + * in this file appropriately. + */ +/* Copyright 2013 Doug Whiting. This code is released to the public domain. */ + +#ifndef _SKEIN_PORT_H_ +#define _SKEIN_PORT_H_ + +#include /* get integer type definitions */ +#include /* for bcopy() */ + +#ifndef RotL_64 +#define RotL_64(x, N) (((x) << (N)) | ((x) >> (64 - (N)))) +#endif + +/* + * Skein is "natively" little-endian (unlike SHA-xxx), for optimal + * performance on x86 CPUs. The Skein code requires the following + * definitions for dealing with endianness: + * + * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian + * Skein_Put64_LSB_First + * Skein_Get64_LSB_First + * Skein_Swap64 + * + * If SKEIN_NEED_SWAP is defined at compile time, it is used here + * along with the portable versions of Put64/Get64/Swap64, which + * are slow in general. + * + * Otherwise, an "auto-detect" of endianness is attempted below. + * If the default handling doesn't work well, the user may insert + * platform-specific code instead (e.g., for big-endian CPUs). + * + */ +#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ + +#include /* get endianness selection */ + +#define PLATFORM_MUST_ALIGN _ALIGNMENT_REQUIRED +#if defined(_BIG_ENDIAN) +/* here for big-endian CPUs */ +#define SKEIN_NEED_SWAP (1) +#else +/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ +#define SKEIN_NEED_SWAP (0) +#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ +#define Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt) +#define Skein_Get64_LSB_First(dst64, src08, wCnt) \ + bcopy(src08, dst64, 8 * (wCnt)) +#endif +#endif + +#endif /* ifndef SKEIN_NEED_SWAP */ + +/* + * Provide any definitions still needed. + */ +#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ +#if SKEIN_NEED_SWAP +#define Skein_Swap64(w64) \ + (((((uint64_t)(w64)) & 0xFF) << 56) | \ + (((((uint64_t)(w64)) >> 8) & 0xFF) << 48) | \ + (((((uint64_t)(w64)) >> 16) & 0xFF) << 40) | \ + (((((uint64_t)(w64)) >> 24) & 0xFF) << 32) | \ + (((((uint64_t)(w64)) >> 32) & 0xFF) << 24) | \ + (((((uint64_t)(w64)) >> 40) & 0xFF) << 16) | \ + (((((uint64_t)(w64)) >> 48) & 0xFF) << 8) | \ + (((((uint64_t)(w64)) >> 56) & 0xFF))) +#else +#define Skein_Swap64(w64) (w64) +#endif +#endif /* ifndef Skein_Swap64 */ + +#ifndef Skein_Put64_LSB_First +void +Skein_Put64_LSB_First(uint8_t *dst, const uint64_t *src, size_t bCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ +{ + /* + * this version is fully portable (big-endian or little-endian), + * but slow + */ + size_t n; + + for (n = 0; n < bCnt; n++) + dst[n] = (uint8_t)(src[n >> 3] >> (8 * (n & 7))); +} +#else +; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Put64_LSB_First */ + +#ifndef Skein_Get64_LSB_First +void +Skein_Get64_LSB_First(uint64_t *dst, const uint8_t *src, size_t wCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ +{ + /* + * this version is fully portable (big-endian or little-endian), + * but slow + */ + size_t n; + + for (n = 0; n < 8 * wCnt; n += 8) + dst[n / 8] = (((uint64_t)src[n])) + + (((uint64_t)src[n + 1]) << 8) + + (((uint64_t)src[n + 2]) << 16) + + (((uint64_t)src[n + 3]) << 24) + + (((uint64_t)src[n + 4]) << 32) + + (((uint64_t)src[n + 5]) << 40) + + (((uint64_t)src[n + 6]) << 48) + + (((uint64_t)src[n + 7]) << 56); +} +#else +; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Get64_LSB_First */ + +#endif /* _SKEIN_PORT_H_ */ diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S index b689c9022..d55c5eb48 100644 --- a/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -62,11 +62,9 @@ */ /* - * This file was generated by a perl script (sha512-x86_64.pl) that could - * be used to generate sha256 and sha512 variants from the same code base. - * For our purposes, we only need sha256 and so getting the perl script to - * run as part of the build process seemed superfluous. The comments from - * the original file have been pasted above. + * This file was generated by a perl script (sha512-x86_64.pl) that were + * used to generate sha256 and sha512 variants from the same code base. + * The comments from the original file have been pasted above. */ #if defined(lint) || defined(__lint) diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S new file mode 100644 index 000000000..24a41745b --- /dev/null +++ b/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -0,0 +1,2083 @@ +/* + * ==================================================================== + * Written by Andy Polyakov for the OpenSSL + * project. Rights for redistribution and usage in source and binary + * forms are granted according to the OpenSSL license. + * ==================================================================== + * + * sha256/512_block procedure for x86_64. + * + * 40% improvement over compiler-generated code on Opteron. On EM64T + * sha256 was observed to run >80% faster and sha512 - >40%. No magical + * tricks, just straight implementation... I really wonder why gcc + * [being armed with inline assembler] fails to generate as fast code. + * The only thing which is cool about this module is that it's very + * same instruction sequence used for both SHA-256 and SHA-512. In + * former case the instructions operate on 32-bit operands, while in + * latter - on 64-bit ones. All I had to do is to get one flavor right, + * the other one passed the test right away:-) + * + * sha256_block runs in ~1005 cycles on Opteron, which gives you + * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock + * frequency in GHz. sha512_block runs in ~1275 cycles, which results + * in 128*1000/1275=100MBps per GHz. Is there room for improvement? + * Well, if you compare it to IA-64 implementation, which maintains + * X[16] in register bank[!], tends to 4 instructions per CPU clock + * cycle and runs in 1003 cycles, 1275 is very good result for 3-way + * issue Opteron pipeline and X[16] maintained in memory. So that *if* + * there is a way to improve it, *then* the only way would be to try to + * offload X[16] updates to SSE unit, but that would require "deeper" + * loop unroll, which in turn would naturally cause size blow-up, not + * to mention increased complexity! And once again, only *if* it's + * actually possible to noticeably improve overall ILP, instruction + * level parallelism, on a given CPU implementation in this case. + * + * Special note on Intel EM64T. While Opteron CPU exhibits perfect + * perfromance ratio of 1.5 between 64- and 32-bit flavors [see above], + * [currently available] EM64T CPUs apparently are far from it. On the + * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit + * sha256_block:-( This is presumably because 64-bit shifts/rotates + * apparently are not atomic instructions, but implemented in microcode. + */ + +/* + * OpenSolaris OS modifications + * + * Sun elects to use this software under the BSD license. + * + * This source originates from OpenSSL file sha512-x86_64.pl at + * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz + * (presumably for future OpenSSL release 0.9.8h), with these changes: + * + * 1. Added perl "use strict" and declared variables. + * + * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from + * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. + * + * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) + * assemblers). Replaced the .picmeup macro with assembler code. + * + * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", + * at the beginning of SHA2_CTX (the next field is 8-byte aligned). + */ + +/* + * This file was generated by a perl script (sha512-x86_64.pl) that were + * used to generate sha256 and sha512 variants from the same code base. + * The comments from the original file have been pasted above. + */ + + +#if defined(lint) || defined(__lint) +#include +#include + +/* ARGSUSED */ +void +SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) +{ +} + + +#else +#define _ASM +#include + +ENTRY_NP(SHA512TransformBlocks) + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%rbp # copy %rsp + shl $4,%rdx # num*16 + sub $16*8+4*8,%rsp + lea (%rsi,%rdx,8),%rdx # inp+num*16*8 + and $-64,%rsp # align stack frame + add $8,%rdi # Skip OpenSolaris field, "algotype" + mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg + mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg + mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg + mov %rbp,16*8+3*8(%rsp) # save copy of %rsp + + /.picmeup %rbp + / The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts + / the address of the "next" instruction into the target register + / (%rbp). This generates these 2 instructions: + lea .Llea(%rip),%rbp + /nop / .picmeup generates a nop for mod 8 alignment--not needed here + +.Llea: + lea K512-.(%rbp),%rbp + + mov 8*0(%rdi),%rax + mov 8*1(%rdi),%rbx + mov 8*2(%rdi),%rcx + mov 8*3(%rdi),%rdx + mov 8*4(%rdi),%r8 + mov 8*5(%rdi),%r9 + mov 8*6(%rdi),%r10 + mov 8*7(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + xor %rdi,%rdi + mov 8*0(%rsi),%r12 + bswap %r12 + mov %r8,%r13 + mov %r8,%r14 + mov %r9,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r10,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r8,%r15 # (f^g)&e + mov %r12,0(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r11,%r12 # T1+=h + + mov %rax,%r11 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rax,%r13 + mov %rax,%r14 + + ror $28,%r11 + ror $34,%r13 + mov %rax,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r11 + ror $5,%r13 + or %rcx,%r14 # a|c + + xor %r13,%r11 # h=Sigma0(a) + and %rcx,%r15 # a&c + add %r12,%rdx # d+=T1 + + and %rbx,%r14 # (a|c)&b + add %r12,%r11 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r11 # h+=Maj(a,b,c) + mov 8*1(%rsi),%r12 + bswap %r12 + mov %rdx,%r13 + mov %rdx,%r14 + mov %r8,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r9,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rdx,%r15 # (f^g)&e + mov %r12,8(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r10,%r12 # T1+=h + + mov %r11,%r10 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r11,%r13 + mov %r11,%r14 + + ror $28,%r10 + ror $34,%r13 + mov %r11,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r10 + ror $5,%r13 + or %rbx,%r14 # a|c + + xor %r13,%r10 # h=Sigma0(a) + and %rbx,%r15 # a&c + add %r12,%rcx # d+=T1 + + and %rax,%r14 # (a|c)&b + add %r12,%r10 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r10 # h+=Maj(a,b,c) + mov 8*2(%rsi),%r12 + bswap %r12 + mov %rcx,%r13 + mov %rcx,%r14 + mov %rdx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r8,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rcx,%r15 # (f^g)&e + mov %r12,16(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r9,%r12 # T1+=h + + mov %r10,%r9 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r10,%r13 + mov %r10,%r14 + + ror $28,%r9 + ror $34,%r13 + mov %r10,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r9 + ror $5,%r13 + or %rax,%r14 # a|c + + xor %r13,%r9 # h=Sigma0(a) + and %rax,%r15 # a&c + add %r12,%rbx # d+=T1 + + and %r11,%r14 # (a|c)&b + add %r12,%r9 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r9 # h+=Maj(a,b,c) + mov 8*3(%rsi),%r12 + bswap %r12 + mov %rbx,%r13 + mov %rbx,%r14 + mov %rcx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rdx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rbx,%r15 # (f^g)&e + mov %r12,24(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r8,%r12 # T1+=h + + mov %r9,%r8 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r9,%r13 + mov %r9,%r14 + + ror $28,%r8 + ror $34,%r13 + mov %r9,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r8 + ror $5,%r13 + or %r11,%r14 # a|c + + xor %r13,%r8 # h=Sigma0(a) + and %r11,%r15 # a&c + add %r12,%rax # d+=T1 + + and %r10,%r14 # (a|c)&b + add %r12,%r8 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r8 # h+=Maj(a,b,c) + mov 8*4(%rsi),%r12 + bswap %r12 + mov %rax,%r13 + mov %rax,%r14 + mov %rbx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rcx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rax,%r15 # (f^g)&e + mov %r12,32(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rdx,%r12 # T1+=h + + mov %r8,%rdx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r8,%r13 + mov %r8,%r14 + + ror $28,%rdx + ror $34,%r13 + mov %r8,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rdx + ror $5,%r13 + or %r10,%r14 # a|c + + xor %r13,%rdx # h=Sigma0(a) + and %r10,%r15 # a&c + add %r12,%r11 # d+=T1 + + and %r9,%r14 # (a|c)&b + add %r12,%rdx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rdx # h+=Maj(a,b,c) + mov 8*5(%rsi),%r12 + bswap %r12 + mov %r11,%r13 + mov %r11,%r14 + mov %rax,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rbx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r11,%r15 # (f^g)&e + mov %r12,40(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rcx,%r12 # T1+=h + + mov %rdx,%rcx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rdx,%r13 + mov %rdx,%r14 + + ror $28,%rcx + ror $34,%r13 + mov %rdx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rcx + ror $5,%r13 + or %r9,%r14 # a|c + + xor %r13,%rcx # h=Sigma0(a) + and %r9,%r15 # a&c + add %r12,%r10 # d+=T1 + + and %r8,%r14 # (a|c)&b + add %r12,%rcx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rcx # h+=Maj(a,b,c) + mov 8*6(%rsi),%r12 + bswap %r12 + mov %r10,%r13 + mov %r10,%r14 + mov %r11,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rax,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r10,%r15 # (f^g)&e + mov %r12,48(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rbx,%r12 # T1+=h + + mov %rcx,%rbx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rcx,%r13 + mov %rcx,%r14 + + ror $28,%rbx + ror $34,%r13 + mov %rcx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rbx + ror $5,%r13 + or %r8,%r14 # a|c + + xor %r13,%rbx # h=Sigma0(a) + and %r8,%r15 # a&c + add %r12,%r9 # d+=T1 + + and %rdx,%r14 # (a|c)&b + add %r12,%rbx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rbx # h+=Maj(a,b,c) + mov 8*7(%rsi),%r12 + bswap %r12 + mov %r9,%r13 + mov %r9,%r14 + mov %r10,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r11,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r9,%r15 # (f^g)&e + mov %r12,56(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rax,%r12 # T1+=h + + mov %rbx,%rax + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rbx,%r13 + mov %rbx,%r14 + + ror $28,%rax + ror $34,%r13 + mov %rbx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rax + ror $5,%r13 + or %rdx,%r14 # a|c + + xor %r13,%rax # h=Sigma0(a) + and %rdx,%r15 # a&c + add %r12,%r8 # d+=T1 + + and %rcx,%r14 # (a|c)&b + add %r12,%rax # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rax # h+=Maj(a,b,c) + mov 8*8(%rsi),%r12 + bswap %r12 + mov %r8,%r13 + mov %r8,%r14 + mov %r9,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r10,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r8,%r15 # (f^g)&e + mov %r12,64(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r11,%r12 # T1+=h + + mov %rax,%r11 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rax,%r13 + mov %rax,%r14 + + ror $28,%r11 + ror $34,%r13 + mov %rax,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r11 + ror $5,%r13 + or %rcx,%r14 # a|c + + xor %r13,%r11 # h=Sigma0(a) + and %rcx,%r15 # a&c + add %r12,%rdx # d+=T1 + + and %rbx,%r14 # (a|c)&b + add %r12,%r11 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r11 # h+=Maj(a,b,c) + mov 8*9(%rsi),%r12 + bswap %r12 + mov %rdx,%r13 + mov %rdx,%r14 + mov %r8,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r9,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rdx,%r15 # (f^g)&e + mov %r12,72(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r10,%r12 # T1+=h + + mov %r11,%r10 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r11,%r13 + mov %r11,%r14 + + ror $28,%r10 + ror $34,%r13 + mov %r11,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r10 + ror $5,%r13 + or %rbx,%r14 # a|c + + xor %r13,%r10 # h=Sigma0(a) + and %rbx,%r15 # a&c + add %r12,%rcx # d+=T1 + + and %rax,%r14 # (a|c)&b + add %r12,%r10 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r10 # h+=Maj(a,b,c) + mov 8*10(%rsi),%r12 + bswap %r12 + mov %rcx,%r13 + mov %rcx,%r14 + mov %rdx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r8,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rcx,%r15 # (f^g)&e + mov %r12,80(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r9,%r12 # T1+=h + + mov %r10,%r9 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r10,%r13 + mov %r10,%r14 + + ror $28,%r9 + ror $34,%r13 + mov %r10,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r9 + ror $5,%r13 + or %rax,%r14 # a|c + + xor %r13,%r9 # h=Sigma0(a) + and %rax,%r15 # a&c + add %r12,%rbx # d+=T1 + + and %r11,%r14 # (a|c)&b + add %r12,%r9 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r9 # h+=Maj(a,b,c) + mov 8*11(%rsi),%r12 + bswap %r12 + mov %rbx,%r13 + mov %rbx,%r14 + mov %rcx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rdx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rbx,%r15 # (f^g)&e + mov %r12,88(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r8,%r12 # T1+=h + + mov %r9,%r8 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r9,%r13 + mov %r9,%r14 + + ror $28,%r8 + ror $34,%r13 + mov %r9,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r8 + ror $5,%r13 + or %r11,%r14 # a|c + + xor %r13,%r8 # h=Sigma0(a) + and %r11,%r15 # a&c + add %r12,%rax # d+=T1 + + and %r10,%r14 # (a|c)&b + add %r12,%r8 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r8 # h+=Maj(a,b,c) + mov 8*12(%rsi),%r12 + bswap %r12 + mov %rax,%r13 + mov %rax,%r14 + mov %rbx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rcx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rax,%r15 # (f^g)&e + mov %r12,96(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rdx,%r12 # T1+=h + + mov %r8,%rdx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r8,%r13 + mov %r8,%r14 + + ror $28,%rdx + ror $34,%r13 + mov %r8,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rdx + ror $5,%r13 + or %r10,%r14 # a|c + + xor %r13,%rdx # h=Sigma0(a) + and %r10,%r15 # a&c + add %r12,%r11 # d+=T1 + + and %r9,%r14 # (a|c)&b + add %r12,%rdx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rdx # h+=Maj(a,b,c) + mov 8*13(%rsi),%r12 + bswap %r12 + mov %r11,%r13 + mov %r11,%r14 + mov %rax,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rbx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r11,%r15 # (f^g)&e + mov %r12,104(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rcx,%r12 # T1+=h + + mov %rdx,%rcx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rdx,%r13 + mov %rdx,%r14 + + ror $28,%rcx + ror $34,%r13 + mov %rdx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rcx + ror $5,%r13 + or %r9,%r14 # a|c + + xor %r13,%rcx # h=Sigma0(a) + and %r9,%r15 # a&c + add %r12,%r10 # d+=T1 + + and %r8,%r14 # (a|c)&b + add %r12,%rcx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rcx # h+=Maj(a,b,c) + mov 8*14(%rsi),%r12 + bswap %r12 + mov %r10,%r13 + mov %r10,%r14 + mov %r11,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rax,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r10,%r15 # (f^g)&e + mov %r12,112(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rbx,%r12 # T1+=h + + mov %rcx,%rbx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rcx,%r13 + mov %rcx,%r14 + + ror $28,%rbx + ror $34,%r13 + mov %rcx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rbx + ror $5,%r13 + or %r8,%r14 # a|c + + xor %r13,%rbx # h=Sigma0(a) + and %r8,%r15 # a&c + add %r12,%r9 # d+=T1 + + and %rdx,%r14 # (a|c)&b + add %r12,%rbx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rbx # h+=Maj(a,b,c) + mov 8*15(%rsi),%r12 + bswap %r12 + mov %r9,%r13 + mov %r9,%r14 + mov %r10,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r11,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r9,%r15 # (f^g)&e + mov %r12,120(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rax,%r12 # T1+=h + + mov %rbx,%rax + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rbx,%r13 + mov %rbx,%r14 + + ror $28,%rax + ror $34,%r13 + mov %rbx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rax + ror $5,%r13 + or %rdx,%r14 # a|c + + xor %r13,%rax # h=Sigma0(a) + and %rdx,%r15 # a&c + add %r12,%r8 # d+=T1 + + and %rcx,%r14 # (a|c)&b + add %r12,%rax # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rax # h+=Maj(a,b,c) + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + mov 8(%rsp),%r13 + mov 112(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 72(%rsp),%r12 + + add 0(%rsp),%r12 + mov %r8,%r13 + mov %r8,%r14 + mov %r9,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r10,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r8,%r15 # (f^g)&e + mov %r12,0(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r11,%r12 # T1+=h + + mov %rax,%r11 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rax,%r13 + mov %rax,%r14 + + ror $28,%r11 + ror $34,%r13 + mov %rax,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r11 + ror $5,%r13 + or %rcx,%r14 # a|c + + xor %r13,%r11 # h=Sigma0(a) + and %rcx,%r15 # a&c + add %r12,%rdx # d+=T1 + + and %rbx,%r14 # (a|c)&b + add %r12,%r11 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r11 # h+=Maj(a,b,c) + mov 16(%rsp),%r13 + mov 120(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 80(%rsp),%r12 + + add 8(%rsp),%r12 + mov %rdx,%r13 + mov %rdx,%r14 + mov %r8,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r9,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rdx,%r15 # (f^g)&e + mov %r12,8(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r10,%r12 # T1+=h + + mov %r11,%r10 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r11,%r13 + mov %r11,%r14 + + ror $28,%r10 + ror $34,%r13 + mov %r11,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r10 + ror $5,%r13 + or %rbx,%r14 # a|c + + xor %r13,%r10 # h=Sigma0(a) + and %rbx,%r15 # a&c + add %r12,%rcx # d+=T1 + + and %rax,%r14 # (a|c)&b + add %r12,%r10 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r10 # h+=Maj(a,b,c) + mov 24(%rsp),%r13 + mov 0(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 88(%rsp),%r12 + + add 16(%rsp),%r12 + mov %rcx,%r13 + mov %rcx,%r14 + mov %rdx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r8,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rcx,%r15 # (f^g)&e + mov %r12,16(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r9,%r12 # T1+=h + + mov %r10,%r9 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r10,%r13 + mov %r10,%r14 + + ror $28,%r9 + ror $34,%r13 + mov %r10,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r9 + ror $5,%r13 + or %rax,%r14 # a|c + + xor %r13,%r9 # h=Sigma0(a) + and %rax,%r15 # a&c + add %r12,%rbx # d+=T1 + + and %r11,%r14 # (a|c)&b + add %r12,%r9 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r9 # h+=Maj(a,b,c) + mov 32(%rsp),%r13 + mov 8(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 96(%rsp),%r12 + + add 24(%rsp),%r12 + mov %rbx,%r13 + mov %rbx,%r14 + mov %rcx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rdx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rbx,%r15 # (f^g)&e + mov %r12,24(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r8,%r12 # T1+=h + + mov %r9,%r8 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r9,%r13 + mov %r9,%r14 + + ror $28,%r8 + ror $34,%r13 + mov %r9,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r8 + ror $5,%r13 + or %r11,%r14 # a|c + + xor %r13,%r8 # h=Sigma0(a) + and %r11,%r15 # a&c + add %r12,%rax # d+=T1 + + and %r10,%r14 # (a|c)&b + add %r12,%r8 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r8 # h+=Maj(a,b,c) + mov 40(%rsp),%r13 + mov 16(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 104(%rsp),%r12 + + add 32(%rsp),%r12 + mov %rax,%r13 + mov %rax,%r14 + mov %rbx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rcx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rax,%r15 # (f^g)&e + mov %r12,32(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rdx,%r12 # T1+=h + + mov %r8,%rdx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r8,%r13 + mov %r8,%r14 + + ror $28,%rdx + ror $34,%r13 + mov %r8,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rdx + ror $5,%r13 + or %r10,%r14 # a|c + + xor %r13,%rdx # h=Sigma0(a) + and %r10,%r15 # a&c + add %r12,%r11 # d+=T1 + + and %r9,%r14 # (a|c)&b + add %r12,%rdx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rdx # h+=Maj(a,b,c) + mov 48(%rsp),%r13 + mov 24(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 112(%rsp),%r12 + + add 40(%rsp),%r12 + mov %r11,%r13 + mov %r11,%r14 + mov %rax,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rbx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r11,%r15 # (f^g)&e + mov %r12,40(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rcx,%r12 # T1+=h + + mov %rdx,%rcx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rdx,%r13 + mov %rdx,%r14 + + ror $28,%rcx + ror $34,%r13 + mov %rdx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rcx + ror $5,%r13 + or %r9,%r14 # a|c + + xor %r13,%rcx # h=Sigma0(a) + and %r9,%r15 # a&c + add %r12,%r10 # d+=T1 + + and %r8,%r14 # (a|c)&b + add %r12,%rcx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rcx # h+=Maj(a,b,c) + mov 56(%rsp),%r13 + mov 32(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 120(%rsp),%r12 + + add 48(%rsp),%r12 + mov %r10,%r13 + mov %r10,%r14 + mov %r11,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rax,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r10,%r15 # (f^g)&e + mov %r12,48(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rbx,%r12 # T1+=h + + mov %rcx,%rbx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rcx,%r13 + mov %rcx,%r14 + + ror $28,%rbx + ror $34,%r13 + mov %rcx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rbx + ror $5,%r13 + or %r8,%r14 # a|c + + xor %r13,%rbx # h=Sigma0(a) + and %r8,%r15 # a&c + add %r12,%r9 # d+=T1 + + and %rdx,%r14 # (a|c)&b + add %r12,%rbx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rbx # h+=Maj(a,b,c) + mov 64(%rsp),%r13 + mov 40(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 0(%rsp),%r12 + + add 56(%rsp),%r12 + mov %r9,%r13 + mov %r9,%r14 + mov %r10,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r11,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r9,%r15 # (f^g)&e + mov %r12,56(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rax,%r12 # T1+=h + + mov %rbx,%rax + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rbx,%r13 + mov %rbx,%r14 + + ror $28,%rax + ror $34,%r13 + mov %rbx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rax + ror $5,%r13 + or %rdx,%r14 # a|c + + xor %r13,%rax # h=Sigma0(a) + and %rdx,%r15 # a&c + add %r12,%r8 # d+=T1 + + and %rcx,%r14 # (a|c)&b + add %r12,%rax # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rax # h+=Maj(a,b,c) + mov 72(%rsp),%r13 + mov 48(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 8(%rsp),%r12 + + add 64(%rsp),%r12 + mov %r8,%r13 + mov %r8,%r14 + mov %r9,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r10,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r8,%r15 # (f^g)&e + mov %r12,64(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r11,%r12 # T1+=h + + mov %rax,%r11 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rax,%r13 + mov %rax,%r14 + + ror $28,%r11 + ror $34,%r13 + mov %rax,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r11 + ror $5,%r13 + or %rcx,%r14 # a|c + + xor %r13,%r11 # h=Sigma0(a) + and %rcx,%r15 # a&c + add %r12,%rdx # d+=T1 + + and %rbx,%r14 # (a|c)&b + add %r12,%r11 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r11 # h+=Maj(a,b,c) + mov 80(%rsp),%r13 + mov 56(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 16(%rsp),%r12 + + add 72(%rsp),%r12 + mov %rdx,%r13 + mov %rdx,%r14 + mov %r8,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r9,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rdx,%r15 # (f^g)&e + mov %r12,72(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r10,%r12 # T1+=h + + mov %r11,%r10 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r11,%r13 + mov %r11,%r14 + + ror $28,%r10 + ror $34,%r13 + mov %r11,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r10 + ror $5,%r13 + or %rbx,%r14 # a|c + + xor %r13,%r10 # h=Sigma0(a) + and %rbx,%r15 # a&c + add %r12,%rcx # d+=T1 + + and %rax,%r14 # (a|c)&b + add %r12,%r10 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r10 # h+=Maj(a,b,c) + mov 88(%rsp),%r13 + mov 64(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 24(%rsp),%r12 + + add 80(%rsp),%r12 + mov %rcx,%r13 + mov %rcx,%r14 + mov %rdx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r8,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rcx,%r15 # (f^g)&e + mov %r12,80(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r9,%r12 # T1+=h + + mov %r10,%r9 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r10,%r13 + mov %r10,%r14 + + ror $28,%r9 + ror $34,%r13 + mov %r10,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r9 + ror $5,%r13 + or %rax,%r14 # a|c + + xor %r13,%r9 # h=Sigma0(a) + and %rax,%r15 # a&c + add %r12,%rbx # d+=T1 + + and %r11,%r14 # (a|c)&b + add %r12,%r9 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r9 # h+=Maj(a,b,c) + mov 96(%rsp),%r13 + mov 72(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 32(%rsp),%r12 + + add 88(%rsp),%r12 + mov %rbx,%r13 + mov %rbx,%r14 + mov %rcx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rdx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rbx,%r15 # (f^g)&e + mov %r12,88(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %r8,%r12 # T1+=h + + mov %r9,%r8 + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r9,%r13 + mov %r9,%r14 + + ror $28,%r8 + ror $34,%r13 + mov %r9,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%r8 + ror $5,%r13 + or %r11,%r14 # a|c + + xor %r13,%r8 # h=Sigma0(a) + and %r11,%r15 # a&c + add %r12,%rax # d+=T1 + + and %r10,%r14 # (a|c)&b + add %r12,%r8 # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%r8 # h+=Maj(a,b,c) + mov 104(%rsp),%r13 + mov 80(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 40(%rsp),%r12 + + add 96(%rsp),%r12 + mov %rax,%r13 + mov %rax,%r14 + mov %rbx,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rcx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %rax,%r15 # (f^g)&e + mov %r12,96(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rdx,%r12 # T1+=h + + mov %r8,%rdx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %r8,%r13 + mov %r8,%r14 + + ror $28,%rdx + ror $34,%r13 + mov %r8,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rdx + ror $5,%r13 + or %r10,%r14 # a|c + + xor %r13,%rdx # h=Sigma0(a) + and %r10,%r15 # a&c + add %r12,%r11 # d+=T1 + + and %r9,%r14 # (a|c)&b + add %r12,%rdx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rdx # h+=Maj(a,b,c) + mov 112(%rsp),%r13 + mov 88(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 48(%rsp),%r12 + + add 104(%rsp),%r12 + mov %r11,%r13 + mov %r11,%r14 + mov %rax,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rbx,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r11,%r15 # (f^g)&e + mov %r12,104(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rcx,%r12 # T1+=h + + mov %rdx,%rcx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rdx,%r13 + mov %rdx,%r14 + + ror $28,%rcx + ror $34,%r13 + mov %rdx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rcx + ror $5,%r13 + or %r9,%r14 # a|c + + xor %r13,%rcx # h=Sigma0(a) + and %r9,%r15 # a&c + add %r12,%r10 # d+=T1 + + and %r8,%r14 # (a|c)&b + add %r12,%rcx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rcx # h+=Maj(a,b,c) + mov 120(%rsp),%r13 + mov 96(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 56(%rsp),%r12 + + add 112(%rsp),%r12 + mov %r10,%r13 + mov %r10,%r14 + mov %r11,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %rax,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r10,%r15 # (f^g)&e + mov %r12,112(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rbx,%r12 # T1+=h + + mov %rcx,%rbx + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rcx,%r13 + mov %rcx,%r14 + + ror $28,%rbx + ror $34,%r13 + mov %rcx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rbx + ror $5,%r13 + or %r8,%r14 # a|c + + xor %r13,%rbx # h=Sigma0(a) + and %r8,%r15 # a&c + add %r12,%r9 # d+=T1 + + and %rdx,%r14 # (a|c)&b + add %r12,%rbx # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rbx # h+=Maj(a,b,c) + mov 0(%rsp),%r13 + mov 104(%rsp),%r12 + + mov %r13,%r15 + + shr $7,%r13 + ror $1,%r15 + + xor %r15,%r13 + ror $7,%r15 + + xor %r15,%r13 # sigma0(X[(i+1)&0xf]) + mov %r12,%r14 + + shr $6,%r12 + ror $19,%r14 + + xor %r14,%r12 + ror $42,%r14 + + xor %r14,%r12 # sigma1(X[(i+14)&0xf]) + + add %r13,%r12 + + add 64(%rsp),%r12 + + add 120(%rsp),%r12 + mov %r9,%r13 + mov %r9,%r14 + mov %r10,%r15 + + ror $14,%r13 + ror $18,%r14 + xor %r11,%r15 # f^g + + xor %r14,%r13 + ror $23,%r14 + and %r9,%r15 # (f^g)&e + mov %r12,120(%rsp) + + xor %r14,%r13 # Sigma1(e) + xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g + add %rax,%r12 # T1+=h + + mov %rbx,%rax + add %r13,%r12 # T1+=Sigma1(e) + + add %r15,%r12 # T1+=Ch(e,f,g) + mov %rbx,%r13 + mov %rbx,%r14 + + ror $28,%rax + ror $34,%r13 + mov %rbx,%r15 + add (%rbp,%rdi,8),%r12 # T1+=K[round] + + xor %r13,%rax + ror $5,%r13 + or %rdx,%r14 # a|c + + xor %r13,%rax # h=Sigma0(a) + and %rdx,%r15 # a&c + add %r12,%r8 # d+=T1 + + and %rcx,%r14 # (a|c)&b + add %r12,%rax # h+=T1 + + or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) + lea 1(%rdi),%rdi # round++ + + add %r14,%rax # h+=Maj(a,b,c) + cmp $80,%rdi + jb .Lrounds_16_xx + + mov 16*8+0*8(%rsp),%rdi + lea 16*8(%rsi),%rsi + + add 8*0(%rdi),%rax + add 8*1(%rdi),%rbx + add 8*2(%rdi),%rcx + add 8*3(%rdi),%rdx + add 8*4(%rdi),%r8 + add 8*5(%rdi),%r9 + add 8*6(%rdi),%r10 + add 8*7(%rdi),%r11 + + cmp 16*8+2*8(%rsp),%rsi + + mov %rax,8*0(%rdi) + mov %rbx,8*1(%rdi) + mov %rcx,8*2(%rdi) + mov %rdx,8*3(%rdi) + mov %r8,8*4(%rdi) + mov %r9,8*5(%rdi) + mov %r10,8*6(%rdi) + mov %r11,8*7(%rdi) + jb .Lloop + + mov 16*8+3*8(%rsp),%rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + + ret +SET_SIZE(SHA512TransformBlocks) + +.align 64 +.type K512,@object +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#endif /* !lint && !__lint */ diff --git a/module/icp/illumos-crypto.c b/module/icp/illumos-crypto.c index 7dd5dbf42..aa63e431f 100644 --- a/module/icp/illumos-crypto.c +++ b/module/icp/illumos-crypto.c @@ -109,8 +109,10 @@ void __exit icp_fini(void) { + skein_mod_fini(); sha2_mod_fini(); sha1_mod_fini(); + edonr_mod_fini(); aes_mod_fini(); kcf_sched_destroy(); kcf_prov_tab_destroy(); @@ -139,8 +141,10 @@ icp_init(void) /* initialize algorithms */ aes_mod_init(); + edonr_mod_init(); sha1_mod_init(); sha2_mod_init(); + skein_mod_init(); return (0); } diff --git a/module/icp/include/sha2/sha2_impl.h b/module/icp/include/sha2/sha2_impl.h index bb42c3cd4..b9768d344 100644 --- a/module/icp/include/sha2/sha2_impl.h +++ b/module/icp/include/sha2/sha2_impl.h @@ -26,6 +26,8 @@ #ifndef _SHA2_IMPL_H #define _SHA2_IMPL_H +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/module/icp/io/edonr_mod.c b/module/icp/io/edonr_mod.c new file mode 100644 index 000000000..19b5c963d --- /dev/null +++ b/module/icp/io/edonr_mod.c @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +/* + * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic + * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose + * cryptographic use. Users of Edon-R must interface directly to this module. + */ + +static struct modlmisc modlmisc = { + &mod_cryptoops, + "Edon-R Message-Digest Algorithm" +}; + +static struct modlinkage modlinkage = { + MODREV_1, {&modlmisc, NULL} +}; + +int +edonr_mod_init(void) +{ + int error; + + if ((error = mod_install(&modlinkage)) != 0) + return (error); + + return (0); +} + +int +edonr_mod_fini(void) { + return (mod_remove(&modlinkage)); +} diff --git a/module/icp/io/sha2_mod.c b/module/icp/io/sha2_mod.c index be0f7a42c..3913d7618 100644 --- a/module/icp/io/sha2_mod.c +++ b/module/icp/io/sha2_mod.c @@ -30,7 +30,7 @@ #include #include #define _SHA2_IMPL -#include +#include #include /* diff --git a/module/icp/io/skein_mod.c b/module/icp/io/skein_mod.c new file mode 100644 index 000000000..e909a7e31 --- /dev/null +++ b/module/icp/io/skein_mod.c @@ -0,0 +1,721 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ + +#include +#include +#include +#include +#include +#define SKEIN_MODULE_IMPL +#include + +/* + * Like the sha2 module, we create the skein module with two modlinkages: + * - modlmisc to allow direct calls to Skein_* API functions. + * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF). + */ +static struct modlmisc modlmisc = { + &mod_cryptoops, + "Skein Message-Digest Algorithm" +}; + +static struct modlcrypto modlcrypto = { + &mod_cryptoops, + "Skein Kernel SW Provider" +}; + +static struct modlinkage modlinkage = { + MODREV_1, {&modlmisc, &modlcrypto, NULL} +}; + +static crypto_mech_info_t skein_mech_info_tab[] = { + {CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, + 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + {CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, + CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + {CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, + 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + {CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, + CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + {CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, + 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + {CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, + CRYPTO_KEYSIZE_UNIT_IN_BYTES} +}; + +static void skein_provider_status(crypto_provider_handle_t, uint_t *); + +static crypto_control_ops_t skein_control_ops = { + skein_provider_status +}; + +static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *, + crypto_req_handle_t); +static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, + crypto_req_handle_t); +static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); +static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); +static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, + crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, + crypto_req_handle_t); + +static crypto_digest_ops_t skein_digest_ops = { + skein_digest_init, + skein_digest, + skein_update, + NULL, + skein_final, + skein_digest_atomic +}; + +static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, + crypto_spi_ctx_template_t, crypto_req_handle_t); +static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, + crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, + crypto_spi_ctx_template_t, crypto_req_handle_t); + +static crypto_mac_ops_t skein_mac_ops = { + skein_mac_init, + NULL, + skein_update, /* using regular digest update is OK here */ + skein_final, /* using regular digest final is OK here */ + skein_mac_atomic, + NULL +}; + +static int skein_create_ctx_template(crypto_provider_handle_t, + crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, + size_t *, crypto_req_handle_t); +static int skein_free_context(crypto_ctx_t *); + +static crypto_ctx_ops_t skein_ctx_ops = { + skein_create_ctx_template, + skein_free_context +}; + +static crypto_ops_t skein_crypto_ops = {{{{{ + &skein_control_ops, + &skein_digest_ops, + NULL, + &skein_mac_ops, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + &skein_ctx_ops, +}}}}}; + +static crypto_provider_info_t skein_prov_info = {{{{ + CRYPTO_SPI_VERSION_1, + "Skein Software Provider", + CRYPTO_SW_PROVIDER, + NULL, + &skein_crypto_ops, + sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t), + skein_mech_info_tab +}}}}; + +static crypto_kcf_provider_handle_t skein_prov_handle = 0; + +typedef struct skein_ctx { + skein_mech_type_t sc_mech_type; + size_t sc_digest_bitlen; + /*LINTED(E_ANONYMOUS_UNION_DECL)*/ + union { + Skein_256_Ctxt_t sc_256; + Skein_512_Ctxt_t sc_512; + Skein1024_Ctxt_t sc_1024; + }; +} skein_ctx_t; +#define SKEIN_CTX(_ctx_) ((skein_ctx_t *)((_ctx_)->cc_provider_private)) +#define SKEIN_CTX_LVALUE(_ctx_) (_ctx_)->cc_provider_private +#define SKEIN_OP(_skein_ctx, _op, ...) \ + do { \ + skein_ctx_t *sc = (_skein_ctx); \ + switch (sc->sc_mech_type) { \ + case SKEIN_256_MECH_INFO_TYPE: \ + case SKEIN_256_MAC_MECH_INFO_TYPE: \ + (void) Skein_256_ ## _op(&sc->sc_256, __VA_ARGS__);\ + break; \ + case SKEIN_512_MECH_INFO_TYPE: \ + case SKEIN_512_MAC_MECH_INFO_TYPE: \ + (void) Skein_512_ ## _op(&sc->sc_512, __VA_ARGS__);\ + break; \ + case SKEIN1024_MECH_INFO_TYPE: \ + case SKEIN1024_MAC_MECH_INFO_TYPE: \ + (void) Skein1024_ ## _op(&sc->sc_1024, __VA_ARGS__);\ + break; \ + } \ + _NOTE(CONSTCOND) \ + } while (0) + +static int +skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result) +{ + if (mechanism->cm_param != NULL) { + /*LINTED(E_BAD_PTR_CAST_ALIGN)*/ + skein_param_t *param = (skein_param_t *)mechanism->cm_param; + + if (mechanism->cm_param_len != sizeof (*param) || + param->sp_digest_bitlen == 0) { + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + *result = param->sp_digest_bitlen; + } else { + switch (mechanism->cm_type) { + case SKEIN_256_MECH_INFO_TYPE: + *result = 256; + break; + case SKEIN_512_MECH_INFO_TYPE: + *result = 512; + break; + case SKEIN1024_MECH_INFO_TYPE: + *result = 1024; + break; + default: + return (CRYPTO_MECHANISM_INVALID); + } + } + return (CRYPTO_SUCCESS); +} + +int +skein_mod_init(void) +{ + int error; + + if ((error = mod_install(&modlinkage)) != 0) + return (error); + + /* + * Try to register with KCF - failure shouldn't unload us, since we + * still may want to continue providing misc/skein functionality. + */ + (void) crypto_register_provider(&skein_prov_info, &skein_prov_handle); + + return (0); +} + +int +skein_mod_fini(void) { + return (mod_remove(&modlinkage)); +} + +/* + * KCF software provider control entry points. + */ +/* ARGSUSED */ +static void +skein_provider_status(crypto_provider_handle_t provider, uint_t *status) +{ + *status = CRYPTO_PROVIDER_READY; +} + +/* + * General Skein hashing helper functions. + */ + +/* + * Performs an Update on a context with uio input data. + */ +static int +skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data) +{ + off_t offset = data->cd_offset; + size_t length = data->cd_length; + uint_t vec_idx; + size_t cur_len; + const uio_t *uio = data->cd_uio; + + /* we support only kernel buffer */ + if (uio->uio_segflg != UIO_SYSSPACE) + return (CRYPTO_ARGUMENTS_BAD); + + /* + * Jump to the first iovec containing data to be + * digested. + */ + for (vec_idx = 0; vec_idx < uio->uio_iovcnt && + offset >= uio->uio_iov[vec_idx].iov_len; + offset -= uio->uio_iov[vec_idx++].iov_len) + ; + if (vec_idx == uio->uio_iovcnt) { + /* + * The caller specified an offset that is larger than the + * total size of the buffers it provided. + */ + return (CRYPTO_DATA_LEN_RANGE); + } + + /* + * Now do the digesting on the iovecs. + */ + while (vec_idx < uio->uio_iovcnt && length > 0) { + cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset, length); + SKEIN_OP(ctx, Update, (uint8_t *)uio->uio_iov[vec_idx].iov_base + + offset, cur_len); + length -= cur_len; + vec_idx++; + offset = 0; + } + + if (vec_idx == uio->uio_iovcnt && length > 0) { + /* + * The end of the specified iovec's was reached but + * the length requested could not be processed, i.e. + * The caller requested to digest more data than it provided. + */ + return (CRYPTO_DATA_LEN_RANGE); + } + + return (CRYPTO_SUCCESS); +} + +/* + * Performs a Final on a context and writes to a uio digest output. + */ +static int +skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest, + crypto_req_handle_t req) +{ + off_t offset = digest->cd_offset; + uint_t vec_idx; + uio_t *uio = digest->cd_uio; + + /* we support only kernel buffer */ + if (uio->uio_segflg != UIO_SYSSPACE) + return (CRYPTO_ARGUMENTS_BAD); + + /* + * Jump to the first iovec containing ptr to the digest to be returned. + */ + for (vec_idx = 0; offset >= uio->uio_iov[vec_idx].iov_len && + vec_idx < uio->uio_iovcnt; + offset -= uio->uio_iov[vec_idx++].iov_len) + ; + if (vec_idx == uio->uio_iovcnt) { + /* + * The caller specified an offset that is larger than the + * total size of the buffers it provided. + */ + return (CRYPTO_DATA_LEN_RANGE); + } + if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <= + uio->uio_iov[vec_idx].iov_len) { + /* The computed digest will fit in the current iovec. */ + SKEIN_OP(ctx, Final, + (uchar_t *)uio->uio_iov[vec_idx].iov_base + offset); + } else { + uint8_t *digest_tmp; + off_t scratch_offset = 0; + size_t length = CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen); + size_t cur_len; + + digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES( + ctx->sc_digest_bitlen), crypto_kmflag(req)); + if (digest_tmp == NULL) + return (CRYPTO_HOST_MEMORY); + SKEIN_OP(ctx, Final, digest_tmp); + while (vec_idx < uio->uio_iovcnt && length > 0) { + cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset, + length); + bcopy(digest_tmp + scratch_offset, + uio->uio_iov[vec_idx].iov_base + offset, cur_len); + + length -= cur_len; + vec_idx++; + scratch_offset += cur_len; + offset = 0; + } + kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen)); + + if (vec_idx == uio->uio_iovcnt && length > 0) { + /* + * The end of the specified iovec's was reached but + * the length requested could not be processed, i.e. + * The caller requested to digest more data than it + * provided. + */ + return (CRYPTO_DATA_LEN_RANGE); + } + } + + return (CRYPTO_SUCCESS); +} + +/* + * KCF software provider digest entry points. + */ + +/* + * Initializes a skein digest context to the configuration in `mechanism'. + * The mechanism cm_type must be one of SKEIN_*_MECH_INFO_TYPE. The cm_param + * field may contain a skein_param_t structure indicating the length of the + * digest the algorithm should produce. Otherwise the default output lengths + * are applied (32 bytes for Skein-256, 64 bytes for Skein-512 and 128 bytes + * for Skein-1024). + */ +static int +skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, + crypto_req_handle_t req) +{ + int error = CRYPTO_SUCCESS; + + if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type)) + return (CRYPTO_MECHANISM_INVALID); + + SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), + crypto_kmflag(req)); + if (SKEIN_CTX(ctx) == NULL) + return (CRYPTO_HOST_MEMORY); + + SKEIN_CTX(ctx)->sc_mech_type = mechanism->cm_type; + error = skein_get_digest_bitlen(mechanism, + &SKEIN_CTX(ctx)->sc_digest_bitlen); + if (error != CRYPTO_SUCCESS) + goto errout; + SKEIN_OP(SKEIN_CTX(ctx), Init, SKEIN_CTX(ctx)->sc_digest_bitlen); + + return (CRYPTO_SUCCESS); +errout: + bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + SKEIN_CTX_LVALUE(ctx) = NULL; + return (error); +} + +/* + * Executes a skein_update and skein_digest on a pre-initialized crypto + * context in a single step. See the documentation to these functions to + * see what to pass here. + */ +static int +skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, + crypto_req_handle_t req) +{ + int error = CRYPTO_SUCCESS; + + ASSERT(SKEIN_CTX(ctx) != NULL); + + if (digest->cd_length < + CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) { + digest->cd_length = + CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); + return (CRYPTO_BUFFER_TOO_SMALL); + } + + error = skein_update(ctx, data, req); + if (error != CRYPTO_SUCCESS) { + bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + SKEIN_CTX_LVALUE(ctx) = NULL; + digest->cd_length = 0; + return (error); + } + error = skein_final(ctx, digest, req); + + return (error); +} + +/* + * Performs a skein Update with the input message in `data' (successive calls + * can push more data). This is used both for digest and MAC operation. + * Supported input data formats are raw, uio and mblk. + */ +/*ARGSUSED*/ +static int +skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) +{ + int error = CRYPTO_SUCCESS; + + ASSERT(SKEIN_CTX(ctx) != NULL); + + switch (data->cd_format) { + case CRYPTO_DATA_RAW: + SKEIN_OP(SKEIN_CTX(ctx), Update, + (uint8_t *)data->cd_raw.iov_base + data->cd_offset, + data->cd_length); + break; + case CRYPTO_DATA_UIO: + error = skein_digest_update_uio(SKEIN_CTX(ctx), data); + break; + default: + error = CRYPTO_ARGUMENTS_BAD; + } + + return (error); +} + +/* + * Performs a skein Final, writing the output to `digest'. This is used both + * for digest and MAC operation. + * Supported output digest formats are raw, uio and mblk. + */ +/*ARGSUSED*/ +static int +skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) +{ + int error = CRYPTO_SUCCESS; + + ASSERT(SKEIN_CTX(ctx) != NULL); + + if (digest->cd_length < + CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) { + digest->cd_length = + CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); + return (CRYPTO_BUFFER_TOO_SMALL); + } + + switch (digest->cd_format) { + case CRYPTO_DATA_RAW: + SKEIN_OP(SKEIN_CTX(ctx), Final, + (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset); + break; + case CRYPTO_DATA_UIO: + error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req); + break; + default: + error = CRYPTO_ARGUMENTS_BAD; + } + + if (error == CRYPTO_SUCCESS) + digest->cd_length = + CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); + else + digest->cd_length = 0; + + bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx)))); + SKEIN_CTX_LVALUE(ctx) = NULL; + + return (error); +} + +/* + * Performs a full skein digest computation in a single call, configuring the + * algorithm according to `mechanism', reading the input to be digested from + * `data' and writing the output to `digest'. + * Supported input/output formats are raw, uio and mblk. + */ +/*ARGSUSED*/ +static int +skein_digest_atomic(crypto_provider_handle_t provider, + crypto_session_id_t session_id, crypto_mechanism_t *mechanism, + crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) +{ + int error; + skein_ctx_t skein_ctx; + crypto_ctx_t ctx; + SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; + + /* Init */ + if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type)) + return (CRYPTO_MECHANISM_INVALID); + skein_ctx.sc_mech_type = mechanism->cm_type; + error = skein_get_digest_bitlen(mechanism, &skein_ctx.sc_digest_bitlen); + if (error != CRYPTO_SUCCESS) + goto out; + SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen); + + if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS) + goto out; + if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS) + goto out; + +out: + if (error == CRYPTO_SUCCESS) + digest->cd_length = + CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen); + else + digest->cd_length = 0; + bzero(&skein_ctx, sizeof (skein_ctx)); + + return (error); +} + +/* + * Helper function that builds a Skein MAC context from the provided + * mechanism and key. + */ +static int +skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism, + crypto_key_t *key) +{ + int error; + + if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type)) + return (CRYPTO_MECHANISM_INVALID); + if (key->ck_format != CRYPTO_KEY_RAW) + return (CRYPTO_ARGUMENTS_BAD); + ctx->sc_mech_type = mechanism->cm_type; + error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen); + if (error != CRYPTO_SUCCESS) + return (error); + SKEIN_OP(ctx, InitExt, ctx->sc_digest_bitlen, 0, key->ck_data, + CRYPTO_BITS2BYTES(key->ck_length)); + + return (CRYPTO_SUCCESS); +} + +/* + * KCF software provide mac entry points. + */ +/* + * Initializes a skein MAC context. You may pass a ctx_template, in which + * case the template will be reused to make initialization more efficient. + * Otherwise a new context will be constructed. The mechanism cm_type must + * be one of SKEIN_*_MAC_MECH_INFO_TYPE. Same as in skein_digest_init, you + * may pass a skein_param_t in cm_param to configure the length of the + * digest. The key must be in raw format. + */ +static int +skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, + crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, + crypto_req_handle_t req) +{ + int error; + + SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), + crypto_kmflag(req)); + if (SKEIN_CTX(ctx) == NULL) + return (CRYPTO_HOST_MEMORY); + + if (ctx_template != NULL) { + bcopy(ctx_template, SKEIN_CTX(ctx), + sizeof (*SKEIN_CTX(ctx))); + } else { + error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key); + if (error != CRYPTO_SUCCESS) + goto errout; + } + + return (CRYPTO_SUCCESS); +errout: + bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + return (error); +} + +/* + * The MAC update and final calls are reused from the regular digest code. + */ + +/*ARGSUSED*/ +/* + * Same as skein_digest_atomic, performs an atomic Skein MAC operation in + * one step. All the same properties apply to the arguments of this + * function as to those of the partial operations above. + */ +static int +skein_mac_atomic(crypto_provider_handle_t provider, + crypto_session_id_t session_id, crypto_mechanism_t *mechanism, + crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, + crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) +{ + /* faux crypto context just for skein_digest_{update,final} */ + int error; + crypto_ctx_t ctx; + skein_ctx_t skein_ctx; + SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; + + if (ctx_template != NULL) { + bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx)); + } else { + error = skein_mac_ctx_build(&skein_ctx, mechanism, key); + if (error != CRYPTO_SUCCESS) + goto errout; + } + + if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS) + goto errout; + if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS) + goto errout; + + return (CRYPTO_SUCCESS); +errout: + bzero(&skein_ctx, sizeof (skein_ctx)); + return (error); +} + +/* + * KCF software provider context management entry points. + */ + +/* + * Constructs a context template for the Skein MAC algorithm. The same + * properties apply to the arguments of this function as to those of + * skein_mac_init. + */ +/*ARGSUSED*/ +static int +skein_create_ctx_template(crypto_provider_handle_t provider, + crypto_mechanism_t *mechanism, crypto_key_t *key, + crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, + crypto_req_handle_t req) +{ + int error; + skein_ctx_t *ctx_tmpl; + + ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req)); + if (ctx_tmpl == NULL) + return (CRYPTO_HOST_MEMORY); + error = skein_mac_ctx_build(ctx_tmpl, mechanism, key); + if (error != CRYPTO_SUCCESS) + goto errout; + *ctx_template = ctx_tmpl; + *ctx_template_size = sizeof (*ctx_tmpl); + + return (CRYPTO_SUCCESS); +errout: + bzero(ctx_tmpl, sizeof (*ctx_tmpl)); + kmem_free(ctx_tmpl, sizeof (*ctx_tmpl)); + return (error); +} + +/* + * Frees a skein context in a parent crypto context. + */ +static int +skein_free_context(crypto_ctx_t *ctx) +{ + if (SKEIN_CTX(ctx) != NULL) { + bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + SKEIN_CTX_LVALUE(ctx) = NULL; + } + + return (CRYPTO_SUCCESS); +} diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index 5436bae9a..8a975ecb3 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -23,6 +23,9 @@ * Use is subject to license terms. * Copyright (C) 2016 Gvozden Nešković. All rights reserved. */ +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ /* * Fletcher Checksums @@ -206,8 +209,10 @@ static struct fletcher_4_kstat { /* Indicate that benchmark has been completed */ static boolean_t fletcher_4_initialized = B_FALSE; +/*ARGSUSED*/ void -fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +fletcher_2_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { const uint64_t *ip = buf; const uint64_t *ipend = ip + (size / sizeof (uint64_t)); @@ -223,8 +228,10 @@ fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); } +/*ARGSUSED*/ void -fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +fletcher_2_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { const uint64_t *ip = buf; const uint64_t *ipend = ip + (size / sizeof (uint64_t)); @@ -404,8 +411,10 @@ fletcher_4_native_impl(const fletcher_4_ops_t *ops, const void *buf, ops->fini_native(zcp); } +/*ARGSUSED*/ void -fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +fletcher_4_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { const fletcher_4_ops_t *ops; uint64_t p2size = P2ALIGN(size, 64); @@ -443,8 +452,10 @@ fletcher_4_byteswap_impl(const fletcher_4_ops_t *ops, const void *buf, ops->fini_byteswap(zcp); } +/*ARGSUSED*/ void -fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +fletcher_4_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { const fletcher_4_ops_t *ops; uint64_t p2size = P2ALIGN(size, 64); @@ -551,7 +562,7 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) start = gethrtime(); do { for (l = 0; l < 32; l++, run_count++) - fletcher_4_test(data, data_size, &zc); + fletcher_4_test(data, data_size, NULL, &zc); run_time_ns = gethrtime() - start; } while (run_time_ns < FLETCHER_4_BENCH_NS); diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 1d68ca29e..029075ebe 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -70,6 +70,10 @@ zfs_prop_init(void) { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, { "sha256", ZIO_CHECKSUM_SHA256 }, + { "noparity", ZIO_CHECKSUM_NOPARITY }, + { "sha512", ZIO_CHECKSUM_SHA512 }, + { "skein", ZIO_CHECKSUM_SKEIN }, + { "edonr", ZIO_CHECKSUM_EDONR }, { NULL } }; @@ -80,6 +84,14 @@ zfs_prop_init(void) { "sha256", ZIO_CHECKSUM_SHA256 }, { "sha256,verify", ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, + { "sha512", ZIO_CHECKSUM_SHA512 }, + { "sha512,verify", + ZIO_CHECKSUM_SHA512 | ZIO_CHECKSUM_VERIFY }, + { "skein", ZIO_CHECKSUM_SKEIN }, + { "skein,verify", + ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY }, + { "edonr,verify", + ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY }, { NULL } }; @@ -241,12 +253,12 @@ zfs_prop_init(void) zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM", - checksum_table); + "on | off | fletcher2 | fletcher4 | sha256 | sha512 | " + "skein | edonr", "CHECKSUM", checksum_table); zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "on | off | verify | sha256[,verify]", "DEDUP", - dedup_table); + "on | off | verify | sha256[,verify], sha512[,verify], " + "skein[,verify], edonr,verify", "DEDUP", dedup_table); zprop_register_index(ZFS_PROP_COMPRESSION, "compression", ZIO_COMPRESS_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index e1771b233..ce368880c 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -36,6 +36,7 @@ $(MODULE)-objs += dsl_pool.o $(MODULE)-objs += dsl_prop.o $(MODULE)-objs += dsl_scan.o $(MODULE)-objs += dsl_synctask.o +$(MODULE)-objs += edonr_zfs.o $(MODULE)-objs += fm.o $(MODULE)-objs += gzip.o $(MODULE)-objs += lzjb.o @@ -49,6 +50,7 @@ $(MODULE)-objs += refcount.o $(MODULE)-objs += rrwlock.o $(MODULE)-objs += sa.o $(MODULE)-objs += sha256.o +$(MODULE)-objs += skein_zfs.o $(MODULE)-objs += spa.o $(MODULE)-objs += spa_boot.o $(MODULE)-objs += spa_config.o diff --git a/module/zfs/arc.c b/module/zfs/arc.c index bf078aa94..7bae2c42d 100755 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1382,7 +1382,7 @@ arc_cksum_verify(arc_buf_t *buf) return; } - fletcher_2_native(buf->b_data, arc_buf_size(buf), &zc); + fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc); if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc)) panic("buffer modified while frozen!"); mutex_exit(&hdr->b_l1hdr.b_freeze_lock); @@ -1495,7 +1495,7 @@ arc_cksum_compute(arc_buf_t *buf) ASSERT(!ARC_BUF_COMPRESSED(buf)); hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); - fletcher_2_native(buf->b_data, arc_buf_size(buf), + fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, hdr->b_l1hdr.b_freeze_cksum); mutex_exit(&hdr->b_l1hdr.b_freeze_lock); arc_buf_watch(buf); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index e487e469f..2ec41fb51 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -3814,7 +3814,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite); mutex_exit(&db->db_mtx); } else if (db->db_state == DB_NOFILL) { - ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF); + ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || + zp.zp_checksum == ZIO_CHECKSUM_NOPARITY); dr->dr_zio = zio_write(zio, os->os_spa, txg, &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp, dbuf_write_nofill_ready, NULL, NULL, diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 7d2383968..09a3536f5 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ #include @@ -62,7 +62,8 @@ ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, spa_t *spa = ddt->ddt_spa; objset_t *os = ddt->ddt_os; uint64_t *objectp = &ddt->ddt_object[type][class]; - boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup; + boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags & + ZCHECKSUM_FLAG_DEDUP; char name[DDT_NAMELEN]; ddt_object_name(ddt, type, class, name); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index d2f4aac98..80185706c 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1445,7 +1445,8 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) ASSERT(BP_EQUAL(bp, bp_orig)); ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF); - ASSERT(zio_checksum_table[chksum].ci_dedup); + ASSERT(zio_checksum_table[chksum].ci_flags & + ZCHECKSUM_FLAG_NOPWRITE); } dr->dt.dl.dr_overridden_by = *zio->io_bp; dr->dt.dl.dr_override_state = DR_OVERRIDDEN; @@ -1792,8 +1793,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * as well. Otherwise, the metadata checksum defaults * to fletcher4. */ - if (zio_checksum_table[checksum].ci_correctable < 1 || - zio_checksum_table[checksum].ci_eck) + if (!(zio_checksum_table[checksum].ci_flags & + ZCHECKSUM_FLAG_METADATA) || + (zio_checksum_table[checksum].ci_flags & + ZCHECKSUM_FLAG_EMBEDDED)) checksum = ZIO_CHECKSUM_FLETCHER_4; if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL || @@ -1832,17 +1835,20 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, */ if (dedup_checksum != ZIO_CHECKSUM_OFF) { dedup = (wp & WP_DMU_SYNC) ? B_FALSE : B_TRUE; - if (!zio_checksum_table[checksum].ci_dedup) + if (!(zio_checksum_table[checksum].ci_flags & + ZCHECKSUM_FLAG_DEDUP)) dedup_verify = B_TRUE; } /* - * Enable nopwrite if we have a cryptographically secure - * checksum that has no known collisions (i.e. SHA-256) - * and compression is enabled. We don't enable nopwrite if - * dedup is enabled as the two features are mutually exclusive. + * Enable nopwrite if we have secure enough checksum + * algorithm (see comment in zio_nop_write) and + * compression is enabled. We don't enable nopwrite if + * dedup is enabled as the two features are mutually + * exclusive. */ - nopwrite = (!dedup && zio_checksum_table[checksum].ci_dedup && + nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags & + ZCHECKSUM_FLAG_NOPWRITE) && compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); } diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 5e95da52d..f9414ea3a 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -346,7 +346,8 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; } else { drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); - if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) + if (zio_checksum_table[drrw->drr_checksumtype].ci_flags & + ZCHECKSUM_FLAG_DEDUP) drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 3026d8733..9362d49bd 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -56,6 +56,7 @@ #include #include #include +#include /* * The SPA supports block sizes up to 16MB. However, very large blocks @@ -108,6 +109,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) { int used, compressed, uncompressed; int64_t delta; + spa_feature_t f; used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); compressed = BP_GET_PSIZE(bp); @@ -134,10 +136,16 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; dsl_dataset_phys(ds)->ds_unique_bytes += used; + if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) { ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] = B_TRUE; } + + f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); + if (f != SPA_FEATURE_NONE) + ds->ds_feature_activation_needed[f] = B_TRUE; + mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, compressed, uncompressed, tx); diff --git a/module/zfs/edonr_zfs.c b/module/zfs/edonr_zfs.c new file mode 100644 index 000000000..3c7d98656 --- /dev/null +++ b/module/zfs/edonr_zfs.c @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + * Use is subject to license terms. + */ +#include +#include +#include +#include /* For CTASSERT() */ + +#define EDONR_MODE 512 +#define EDONR_BLOCK_SIZE EdonR512_BLOCK_SIZE + +/* + * Native zio_checksum interface for the Edon-R hash function. + */ +/*ARGSUSED*/ +void +zio_checksum_edonr_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + uint8_t digest[EDONR_MODE / 8]; + EdonRState ctx; + + ASSERT(ctx_template != NULL); + bcopy(ctx_template, &ctx, sizeof (ctx)); + EdonRUpdate(&ctx, buf, size * 8); + EdonRFinal(&ctx, digest); + bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word)); +} + +/* + * Byteswapped zio_checksum interface for the Edon-R hash function. + */ +void +zio_checksum_edonr_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + zio_cksum_t tmp; + + zio_checksum_edonr_native(buf, size, ctx_template, &tmp); + zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]); + zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]); + zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]); + zcp->zc_word[3] = BSWAP_64(zcp->zc_word[3]); +} + +void * +zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) +{ + EdonRState *ctx; + uint8_t salt_block[EDONR_BLOCK_SIZE]; + + /* + * Edon-R needs all but the last hash invocation to be on full-size + * blocks, but the salt is too small. Rather than simply padding it + * with zeros, we expand the salt into a new salt block of proper + * size by double-hashing it (the new salt block will be composed of + * H(salt) || H(H(salt))). + */ + CTASSERT(EDONR_BLOCK_SIZE == 2 * (EDONR_MODE / 8)); + EdonRHash(EDONR_MODE, salt->zcs_bytes, sizeof (salt->zcs_bytes) * 8, + salt_block); + EdonRHash(EDONR_MODE, salt_block, EDONR_MODE, salt_block + + EDONR_MODE / 8); + + /* + * Feed the new salt block into the hash function - this will serve + * as our MAC key. + */ + ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP); + EdonRInit(ctx, EDONR_MODE); + EdonRUpdate(ctx, salt_block, sizeof (salt_block) * 8); + return (ctx); +} + +void +zio_checksum_edonr_tmpl_free(void *ctx_template) +{ + EdonRState *ctx = ctx_template; + + bzero(ctx, sizeof (*ctx)); + kmem_free(ctx, sizeof (*ctx)); +} diff --git a/module/zfs/sha256.c b/module/zfs/sha256.c index 57f5b7daf..c8a4882f8 100644 --- a/module/zfs/sha256.c +++ b/module/zfs/sha256.c @@ -19,110 +19,64 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ #include #include #include +#include -/* - * SHA-256 checksum, as specified in FIPS 180-3, available at: - * http://csrc.nist.gov/publications/PubsFIPS.html - * - * This is a very compact implementation of SHA-256. - * It is designed to be simple and portable, not to be fast. - */ - -/* - * The literal definitions of Ch() and Maj() according to FIPS 180-3 are: - * - * Ch(x, y, z) (x & y) ^ (~x & z) - * Maj(x, y, z) (x & y) ^ (x & z) ^ (y & z) - * - * We use equivalent logical reductions here that require one less op. - */ -#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y)))) -#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s))) -#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22)) -#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25)) -#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3)) -#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10)) - -static const uint32_t SHA256_K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -static void -SHA256Transform(uint32_t *H, const uint8_t *cp) -{ - uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64]; - - for (t = 0; t < 16; t++, cp += 4) - W[t] = ((uint32_t)cp[0] << 24) | ((uint32_t)cp[1] << 16) | - ((uint32_t)cp[2] << 8) | (uint32_t)cp[3]; - - for (t = 16; t < 64; t++) - W[t] = sigma1(W[t - 2]) + W[t - 7] + - sigma0(W[t - 15]) + W[t - 16]; - - a = H[0]; b = H[1]; c = H[2]; d = H[3]; - e = H[4]; f = H[5]; g = H[6]; h = H[7]; - - for (t = 0; t < 64; t++) { - T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t]; - T2 = SIGMA0(a) + Maj(a, b, c); - h = g; g = f; f = e; e = d + T1; - d = c; c = b; b = a; a = T1 + T2; - } - - H[0] += a; H[1] += b; H[2] += c; H[3] += d; - H[4] += e; H[5] += f; H[6] += g; H[7] += h; -} - +/*ARGSUSED*/ void -zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp) +zio_checksum_SHA256(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { - uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; - uint8_t pad[128]; - int i, padsize; + SHA2_CTX ctx; + zio_cksum_t tmp; - for (i = 0; i < (size & ~63ULL); i += 64) - SHA256Transform(H, (uint8_t *)buf + i); + SHA2Init(SHA256, &ctx); + SHA2Update(&ctx, buf, size); + SHA2Final(&tmp, &ctx); - for (padsize = 0; i < size; i++) - pad[padsize++] = *((uint8_t *)buf + i); - - for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++) - pad[padsize] = 0; - - for (i = 56; i >= 0; i -= 8) - pad[padsize++] = (size << 3) >> i; - - for (i = 0; i < padsize; i += 64) - SHA256Transform(H, pad + i); - - ZIO_SET_CHECKSUM(zcp, - (uint64_t)H[0] << 32 | H[1], - (uint64_t)H[2] << 32 | H[3], - (uint64_t)H[4] << 32 | H[5], - (uint64_t)H[6] << 32 | H[7]); + /* + * A prior implementation of this function had a + * private SHA256 implementation always wrote things out in + * Big Endian and there wasn't a byteswap variant of it. + * To preseve on disk compatibility we need to force that + * behaviour. + */ + zcp->zc_word[0] = BE_64(tmp.zc_word[0]); + zcp->zc_word[1] = BE_64(tmp.zc_word[1]); + zcp->zc_word[2] = BE_64(tmp.zc_word[2]); + zcp->zc_word[3] = BE_64(tmp.zc_word[3]); +} + +/*ARGSUSED*/ +void +zio_checksum_SHA512_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + SHA2_CTX ctx; + + SHA2Init(SHA512_256, &ctx); + SHA2Update(&ctx, buf, size); + SHA2Final(zcp, &ctx); +} + +/*ARGSUSED*/ +void +zio_checksum_SHA512_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + zio_cksum_t tmp; + + zio_checksum_SHA512_native(buf, size, ctx_template, &tmp); + zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); + zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); + zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); + zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]); } diff --git a/module/zfs/skein_zfs.c b/module/zfs/skein_zfs.c new file mode 100644 index 000000000..659234039 --- /dev/null +++ b/module/zfs/skein_zfs.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ +#include +#include +#include + +/* + * Computes a native 256-bit skein MAC checksum. Please note that this + * function requires the presence of a ctx_template that should be allocated + * using zio_checksum_skein_tmpl_init. + */ +/*ARGSUSED*/ +void +zio_checksum_skein_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + Skein_512_Ctxt_t ctx; + + ASSERT(ctx_template != NULL); + bcopy(ctx_template, &ctx, sizeof (ctx)); + (void) Skein_512_Update(&ctx, buf, size); + (void) Skein_512_Final(&ctx, (uint8_t *)zcp); + bzero(&ctx, sizeof (ctx)); +} + +/* + * Byteswapped version of zio_checksum_skein_native. This just invokes + * the native checksum function and byteswaps the resulting checksum (since + * skein is internally endian-insensitive). + */ +void +zio_checksum_skein_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) +{ + zio_cksum_t tmp; + + zio_checksum_skein_native(buf, size, ctx_template, &tmp); + zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); + zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); + zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); + zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]); +} + +/* + * Allocates a skein MAC template suitable for using in skein MAC checksum + * computations and returns a pointer to it. + */ +void * +zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) +{ + Skein_512_Ctxt_t *ctx; + + ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP); + (void) Skein_512_InitExt(ctx, sizeof (zio_cksum_t) * 8, 0, + salt->zcs_bytes, sizeof (salt->zcs_bytes)); + return (ctx); +} + +/* + * Frees a skein context template previously allocated using + * zio_checksum_skein_tmpl_init. + */ +void +zio_checksum_skein_tmpl_free(void *ctx_template) +{ + Skein_512_Ctxt_t *ctx = ctx_template; + + bzero(ctx, sizeof (*ctx)); + kmem_free(ctx, sizeof (*ctx)); +} diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 3264bfb10..c2f914e11 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -25,6 +25,7 @@ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ @@ -2675,6 +2676,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); } + /* Grab the checksum salt from the MOS. */ + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHECKSUM_SALT, 1, + sizeof (spa->spa_cksum_salt.zcs_bytes), + spa->spa_cksum_salt.zcs_bytes); + if (error == ENOENT) { + /* Generate a new salt for subsequent use */ + (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); + } else if (error != 0) { + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } + if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); @@ -3929,6 +3943,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (version >= SPA_VERSION_ZPOOL_HISTORY) spa_history_create_obj(spa, tx); + /* + * Generate some random noise for salted checksums to operate on. + */ + (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, + sizeof (spa->spa_cksum_salt.zcs_bytes)); + /* * Set pool properties. */ @@ -6406,6 +6426,20 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) if (lz4_en && !lz4_ac) spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); } + + /* + * If we haven't written the salt, do so now. Note that the + * feature may not be activated yet, but that's fine since + * the presence of this ZAP entry is backwards compatible. + */ + if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHECKSUM_SALT) == ENOENT) { + VERIFY0(zap_add(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, + sizeof (spa->spa_cksum_salt.zcs_bytes), + spa->spa_cksum_salt.zcs_bytes, tx)); + } + rrw_exit(&dp->dp_config_rwlock, FTAG); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 6330a6a6b..595e594ca 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -23,6 +23,7 @@ * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. */ #include @@ -53,7 +54,7 @@ #include #include #include "zfs_prop.h" -#include "zfeature_common.h" +#include /* * SPA locking @@ -558,6 +559,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL); @@ -686,6 +688,8 @@ spa_remove(spa_t *spa) for (t = 0; t < TXG_SIZE; t++) bplist_destroy(&spa->spa_free_bplist[t]); + zio_checksum_templates_free(spa); + cv_destroy(&spa->spa_async_cv); cv_destroy(&spa->spa_evicting_os_cv); cv_destroy(&spa->spa_proc_cv); @@ -699,6 +703,7 @@ spa_remove(spa_t *spa) mutex_destroy(&spa->spa_history_lock); mutex_destroy(&spa->spa_proc_lock); mutex_destroy(&spa->spa_props_lock); + mutex_destroy(&spa->spa_cksum_tmpls_lock); mutex_destroy(&spa->spa_scrub_lock); mutex_destroy(&spa->spa_suspend_lock); mutex_destroy(&spa->spa_vdev_top_lock); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index f5df2c7d8..d1b415367 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1604,6 +1604,13 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) int c, ret = 0; raidz_col_t *rc; + blkptr_t *bp = zio->io_bp; + enum zio_checksum checksum = (bp == NULL ? zio->io_prop.zp_checksum : + (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); + + if (checksum == ZIO_CHECKSUM_NOPARITY) + return (ret); + for (c = 0; c < rm->rm_firstdatacol; c++) { rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) diff --git a/module/zfs/zfeature_common.c b/module/zfs/zfeature_common.c index 3264f6235..e8b0a16ae 100644 --- a/module/zfs/zfeature_common.c +++ b/module/zfs/zfeature_common.c @@ -253,4 +253,16 @@ zpool_feature_init(void) "Variable on-disk size of dnodes.", ZFEATURE_FLAG_PER_DATASET, large_dnode_deps); } + zfeature_register(SPA_FEATURE_SHA512, + "org.illumos:sha512", "sha512", + "SHA-512/256 hash algorithm.", + ZFEATURE_FLAG_PER_DATASET, NULL); + zfeature_register(SPA_FEATURE_SKEIN, + "org.illumos:skein", "skein", + "Skein hash algorithm.", + ZFEATURE_FLAG_PER_DATASET, NULL); + zfeature_register(SPA_FEATURE_EDONR, + "org.illumos:edonr", "edonr", + "Edon-R hash algorithm.", + ZFEATURE_FLAG_PER_DATASET, NULL); } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 64f630108..9140c62a6 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -185,6 +185,7 @@ #include #include #include +#include #include #include @@ -3809,11 +3810,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (SET_ERROR(ENOTSUP)); break; - case ZFS_PROP_DEDUP: - if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) - return (SET_ERROR(ENOTSUP)); - break; - case ZFS_PROP_VOLBLOCKSIZE: case ZFS_PROP_RECORDSIZE: /* Record sizes above 128k need the feature to be enabled */ @@ -3893,6 +3889,47 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (SET_ERROR(ENOTSUP)); } break; + case ZFS_PROP_CHECKSUM: + case ZFS_PROP_DEDUP: + { + spa_feature_t feature; + spa_t *spa; + uint64_t intval; + int err; + + /* dedup feature version checks */ + if (prop == ZFS_PROP_DEDUP && + zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) + return (SET_ERROR(ENOTSUP)); + + if (nvpair_value_uint64(pair, &intval) != 0) + return (SET_ERROR(EINVAL)); + + /* check prop value is enabled in features */ + feature = zio_checksum_to_feature(intval); + if (feature == SPA_FEATURE_NONE) + break; + + if ((err = spa_open(dsname, &spa, FTAG)) != 0) + return (err); + /* + * Salted checksums are not supported on root pools. + */ + if (spa_bootfs(spa) != 0 && + intval < ZIO_CHECKSUM_FUNCTIONS && + (zio_checksum_table[intval].ci_flags & + ZCHECKSUM_FLAG_SALTED)) { + spa_close(spa, FTAG); + return (SET_ERROR(ERANGE)); + } + if (!spa_feature_is_enabled(spa, feature)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } + spa_close(spa, FTAG); + break; + } + default: break; } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index e26822e34..8a063ab7f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -979,7 +979,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio->io_prop.zp_checksum = checksum; - if (zio_checksum_table[checksum].ci_eck) { + if (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { /* * zec checksums are necessarily destructive -- they modify * the end of the write buffer to hold the verifier/checksum. @@ -1190,8 +1190,8 @@ zio_write_bp_init(zio_t *zio) if (BP_IS_HOLE(bp) || !zp->zp_dedup) return (ZIO_PIPELINE_CONTINUE); - ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup || - zp->zp_dedup_verify); + ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & + ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { BP_SET_DEDUP(bp, 1); @@ -2198,12 +2198,22 @@ zio_write_gang_block(zio_t *pio) } /* - * The zio_nop_write stage in the pipeline determines if allocating - * a new bp is necessary. By leveraging a cryptographically secure checksum, - * such as SHA256, we can compare the checksums of the new data and the old - * to determine if allocating a new block is required. The nopwrite - * feature can handle writes in either syncing or open context (i.e. zil - * writes) and as a result is mutually exclusive with dedup. + * The zio_nop_write stage in the pipeline determines if allocating a + * new bp is necessary. The nopwrite feature can handle writes in + * either syncing or open context (i.e. zil writes) and as a result is + * mutually exclusive with dedup. + * + * By leveraging a cryptographically secure checksum, such as SHA256, we + * can compare the checksums of the new data and the old to determine if + * allocating a new block is required. Note that our requirements for + * cryptographic strength are fairly weak: there can't be any accidental + * hash collisions, but we don't need to be secure against intentional + * (malicious) collisions. To trigger a nopwrite, you have to be able + * to write the file to begin with, and triggering an incorrect (hash + * collision) nopwrite is no worse than simply writing to the file. + * That said, there are no known attacks against the checksum algorithms + * used for nopwrite, assuming that the salt and the checksums + * themselves remain secret. */ static int zio_nop_write(zio_t *zio) @@ -2226,7 +2236,8 @@ zio_nop_write(zio_t *zio) * allocate a new bp. */ if (BP_IS_HOLE(bp_orig) || - !zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_dedup || + !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & + ZCHECKSUM_FLAG_NOPWRITE) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || @@ -2238,7 +2249,8 @@ zio_nop_write(zio_t *zio) * avoid allocating a new bp and issuing any I/O. */ if (ZIO_CHECKSUM_EQUAL(bp->blk_cksum, bp_orig->blk_cksum)) { - ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup); + ASSERT(zio_checksum_table[zp->zp_checksum].ci_flags & + ZCHECKSUM_FLAG_NOPWRITE); ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(bp_orig)); ASSERT3U(BP_GET_LSIZE(bp), ==, BP_GET_LSIZE(bp_orig)); ASSERT(zp->zp_compress != ZIO_COMPRESS_OFF); @@ -2566,7 +2578,8 @@ zio_ddt_write(zio_t *zio) * we can't resolve it, so just convert to an ordinary write. * (And automatically e-mail a paper to Nature?) */ - if (!zio_checksum_table[zp->zp_checksum].ci_dedup) { + if (!(zio_checksum_table[zp->zp_checksum].ci_flags & + ZCHECKSUM_FLAG_DEDUP)) { zp->zp_checksum = spa_dedup_checksum(spa); zio_pop_transforms(zio); zio->io_stage = ZIO_STAGE_OPEN; diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index b05e787dc..59871c50e 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -21,10 +21,12 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2013 Saso Kiselkov. All rights reserved. */ #include #include +#include #include #include #include @@ -58,28 +60,96 @@ * checksum function of the appropriate strength. When reading a block, * we compare the expected checksum against the actual checksum, which we * compute via the checksum function specified by BP_GET_CHECKSUM(bp). + * + * SALTED CHECKSUMS + * + * To enable the use of less secure hash algorithms with dedup, we + * introduce the notion of salted checksums (MACs, really). A salted + * checksum is fed both a random 256-bit value (the salt) and the data + * to be checksummed. This salt is kept secret (stored on the pool, but + * never shown to the user). Thus even if an attacker knew of collision + * weaknesses in the hash algorithm, they won't be able to mount a known + * plaintext attack on the DDT, since the actual hash value cannot be + * known ahead of time. How the salt is used is algorithm-specific + * (some might simply prefix it to the data block, others might need to + * utilize a full-blown HMAC). On disk the salt is stored in a ZAP + * object in the MOS (DMU_POOL_CHECKSUM_SALT). + * + * CONTEXT TEMPLATES + * + * Some hashing algorithms need to perform a substantial amount of + * initialization work (e.g. salted checksums above may need to pre-hash + * the salt) before being able to process data. Performing this + * redundant work for each block would be wasteful, so we instead allow + * a checksum algorithm to do the work once (the first time it's used) + * and then keep this pre-initialized context as a template inside the + * spa_t (spa_cksum_tmpls). If the zio_checksum_info_t contains + * non-NULL ci_tmpl_init and ci_tmpl_free callbacks, they are used to + * construct and destruct the pre-initialized checksum context. The + * pre-initialized context is then reused during each checksum + * invocation and passed to the checksum function. */ /*ARGSUSED*/ static void -zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) +zio_checksum_off(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); } zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { - {{NULL, NULL}, 0, 0, 0, "inherit"}, - {{NULL, NULL}, 0, 0, 0, "on"}, - {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, - {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, - {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, - {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, - {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, - {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, + {{NULL, NULL}, NULL, NULL, 0, "inherit"}, + {{NULL, NULL}, NULL, NULL, 0, "on"}, + {{zio_checksum_off, zio_checksum_off}, + NULL, NULL, 0, "off"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, + "label"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, + "gang_header"}, + {{fletcher_2_native, fletcher_2_byteswap}, + NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"}, + {{fletcher_2_native, fletcher_2_byteswap}, + NULL, NULL, 0, "fletcher2"}, + {{fletcher_4_native, fletcher_4_byteswap}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | + ZCHECKSUM_FLAG_NOPWRITE, "sha256"}, + {{fletcher_4_native, fletcher_4_byteswap}, + NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"}, + {{zio_checksum_off, zio_checksum_off}, + NULL, NULL, 0, "noparity"}, + {{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap}, + NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | + ZCHECKSUM_FLAG_NOPWRITE, "sha512"}, + {{zio_checksum_skein_native, zio_checksum_skein_byteswap}, + zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free, + ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | + ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"}, + {{zio_checksum_edonr_native, zio_checksum_edonr_byteswap}, + zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free, + ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED | + ZCHECKSUM_FLAG_NOPWRITE, "edonr"}, }; +spa_feature_t +zio_checksum_to_feature(enum zio_checksum cksum) +{ + switch (cksum) { + case ZIO_CHECKSUM_SHA512: + return (SPA_FEATURE_SHA512); + case ZIO_CHECKSUM_SKEIN: + return (SPA_FEATURE_SKEIN); + case ZIO_CHECKSUM_EDONR: + return (SPA_FEATURE_EDONR); + default: + return (SPA_FEATURE_NONE); + } +} + enum zio_checksum zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) { @@ -113,7 +183,8 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); - ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || + ASSERT((zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_flags & + ZCHECKSUM_FLAG_DEDUP) || (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); return (child); @@ -145,6 +216,30 @@ zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); } +/* + * Calls the template init function of a checksum which supports context + * templates and installs the template into the spa_t. + */ +static void +zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) +{ + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + + if (ci->ci_tmpl_init == NULL) + return; + if (spa->spa_cksum_tmpls[checksum] != NULL) + return; + + VERIFY(ci->ci_tmpl_free != NULL); + mutex_enter(&spa->spa_cksum_tmpls_lock); + if (spa->spa_cksum_tmpls[checksum] == NULL) { + spa->spa_cksum_tmpls[checksum] = + ci->ci_tmpl_init(&spa->spa_cksum_salt); + VERIFY(spa->spa_cksum_tmpls[checksum] != NULL); + } + mutex_exit(&spa->spa_cksum_tmpls_lock); +} + /* * Generate the checksum. */ @@ -156,11 +251,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, uint64_t offset = zio->io_offset; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t cksum; + spa_t *spa = zio->io_spa; ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT(ci->ci_func[0] != NULL); - if (ci->ci_eck) { + zio_checksum_template_init(checksum, spa); + + if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_eck_t *eck; if (checksum == ZIO_CHECKSUM_ZILOG2) { @@ -179,10 +277,12 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, else bp->blk_cksum = eck->zec_cksum; eck->zec_magic = ZEC_MAGIC; - ci->ci_func[0](data, size, &cksum); + ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], + &cksum); eck->zec_cksum = cksum; } else { - ci->ci_func[0](data, size, &bp->blk_cksum); + ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], + &bp->blk_cksum); } } @@ -191,13 +291,15 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; - zio_cksum_t actual_cksum, expected_cksum; int byteswap; + zio_cksum_t actual_cksum, expected_cksum; if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) return (SET_ERROR(EINVAL)); - if (ci->ci_eck) { + zio_checksum_template_init(checksum, spa); + + if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_eck_t *eck; zio_cksum_t verifier; @@ -235,7 +337,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, expected_cksum = eck->zec_cksum; eck->zec_cksum = verifier; - ci->ci_func[byteswap](data, size, &actual_cksum); + ci->ci_func[byteswap](data, size, + spa->spa_cksum_tmpls[checksum], &actual_cksum); eck->zec_cksum = expected_cksum; if (byteswap) { @@ -245,7 +348,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, } else { byteswap = BP_SHOULD_BYTESWAP(bp); expected_cksum = bp->blk_cksum; - ci->ci_func[byteswap](data, size, &actual_cksum); + ci->ci_func[byteswap](data, size, + spa->spa_cksum_tmpls[checksum], &actual_cksum); } if (info != NULL) { @@ -286,3 +390,24 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) } return (error); } + +/* + * Called by a spa_t that's about to be deallocated. This steps through + * all of the checksum context templates and deallocates any that were + * initialized using the algorithm-specific template init function. + */ +void +zio_checksum_templates_free(spa_t *spa) +{ + enum zio_checksum checksum; + for (checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS; + checksum++) { + if (spa->spa_cksum_tmpls[checksum] != NULL) { + zio_checksum_info_t *ci = &zio_checksum_table[checksum]; + + VERIFY(ci->ci_tmpl_free != NULL); + ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]); + spa->spa_cksum_tmpls[checksum] = NULL; + } + } +} diff --git a/scripts/zfs2zol-patch.sed b/scripts/zfs2zol-patch.sed index e6fc5c8b0..d4def4429 100755 --- a/scripts/zfs2zol-patch.sed +++ b/scripts/zfs2zol-patch.sed @@ -19,3 +19,22 @@ s:usr/src/test/zfs-tests/runfiles:tests/runfiles:g s:usr/src/test/zfs-tests/tests/functional:tests/zfs-tests/tests/functional:g s:usr/src/test/zfs-tests/tests/perf:tests/zfs-tests/tests/perf:g s:usr/src/test/test-runner/cmd/run.py:tests/test-runner/cmd/test-runner.py:g + +# +# The usr/src/common/zfs/ files go in a couple different dirs. +# usr/src/common/zfs/zfeature_common.c goes in module/zfs +# +s:usr/src/common/zfs/zfeature_common.c:module/zfs/zfeature_common.c:g + +# ...but most of the rest of the C files go in module/zcommon +s/usr\/src\/common\/zfs\/\(.*\)\.c/module\/zcommon\/\1.c/g + +# crypto framework +s:usr/src/common/crypto:module/icp/algs:g +s:usr/src/uts/common/crypto/io:module/icp/io:g + +# Headers +s:usr/src/common/zfs/\(.*\)\.h:include/\1.h:g + +# Man pages +s:usr/src/man:man:g diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 9a85af5d6..92f867ab9 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -60,6 +60,9 @@ tests = ['cache_002_pos', 'cache_003_pos', 'cache_004_neg', [tests/functional/casenorm] tests = ['case_all_values', 'norm_all_values'] +[tests/functional/checksum] +tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos'] + [tests/functional/clean_mirror] tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos', 'clean_mirror_003_pos', 'clean_mirror_004_pos'] diff --git a/tests/zfs-tests/cmd/file_write/file_write.c b/tests/zfs-tests/cmd/file_write/file_write.c index 046794820..81fc5de39 100644 --- a/tests/zfs-tests/cmd/file_write/file_write.c +++ b/tests/zfs-tests/cmd/file_write/file_write.c @@ -30,6 +30,9 @@ #include #include #include +#include +#include +#include typedef unsigned char uchar_t; typedef long long longlong_t; @@ -44,6 +47,16 @@ static unsigned char bigbuffer[BIGBUFFERSIZE]; static void usage(char *); +/* + * psudo-randomize the buffer + */ +void randomize_buffer(int block_size) { + int i; + char rnd = rand() & 0xff; + for (i = 0; i < block_size; i++) + bigbuffer[i] ^= rnd; +} + int main(int argc, char **argv) { @@ -81,7 +94,10 @@ main(int argc, char **argv) write_count = atoi(optarg); break; case 'd': - fillchar = atoi(optarg); + if (optarg[0] == 'R') + fillchar = 'R'; /* R = random data */ + else + fillchar = atoi(optarg); break; case 's': offset = atoll(optarg); @@ -138,6 +154,9 @@ main(int argc, char **argv) nxtfillchar = fillchar; k = 0; + if (fillchar == 'R') + srand(time(NULL)); + for (i = 0; i < block_size; i++) { bigbuffer[i] = nxtfillchar; @@ -146,6 +165,8 @@ main(int argc, char **argv) k = 0; } nxtfillchar = k++; + } else if (fillchar == 'R') { + nxtfillchar = rand() & 0xff; } } @@ -191,14 +212,21 @@ main(int argc, char **argv) if (verbose) { (void) printf("%s: block_size = %d, write_count = %d, " - "offset = %lld, data = %s%d\n", filename, block_size, - write_count, offset, - (fillchar == 0) ? "0->" : "", - (fillchar == 0) ? DATA_RANGE : fillchar); + "offset = %lld, ", filename, block_size, + write_count, offset); + if (fillchar == 'R') { + (void) printf("data = [random]\n"); + } else { + (void) printf("data = %s%d\n", + (fillchar == 0) ? "0->" : "", + (fillchar == 0) ? DATA_RANGE : fillchar); + } } for (i = 0; i < write_count; i++) { ssize_t n; + if (fillchar == 'R') + randomize_buffer(block_size); if ((n = write(bigfd, &bigbuffer, block_size)) == -1) { (void) printf("write failed (%ld), good_writes = %" @@ -224,9 +252,11 @@ usage(char *prog) { (void) printf("Usage: %s [-v] -o {create,overwrite,append} -f file_name" " [-b block_size]\n" - "\t[-s offset] [-c write_count] [-d data]\n" - "\twhere [data] equal to zero causes chars " - "0->%d to be repeated throughout\n", prog, DATA_RANGE); + "\t[-s offset] [-c write_count] [-d data]\n\n" + "Where [data] equal to zero causes chars " + "0->%d to be repeated throughout, or [data]\n" + "equal to 'R' for psudorandom data.\n", + prog, DATA_RANGE); exit(1); } diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 37f173e12..62ba3a9eb 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -147,12 +147,14 @@ function default_setup_noexit typeset disklist=$1 typeset container=$2 typeset volume=$3 + log_note begin default_setup_noexit if is_global_zone; then if poolexists $TESTPOOL ; then destroy_pool $TESTPOOL fi [[ -d /$TESTPOOL ]] && $RM -rf /$TESTPOOL + log_note creating pool $TESTPOOL $disklist log_must $ZPOOL create -f $TESTPOOL $disklist else reexport_pool @@ -1539,6 +1541,18 @@ function get_disklist # pool $ECHO $disklist } +# +# Given a pool, and this function list all disks in the pool with their full +# path (like "/dev/sda" instead of "sda"). +# +function get_disklist_fullpath # pool +{ + args="-P $1" + get_disklist $args +} + + + # /** # This function kills a given list of processes after a time period. We use # this in the stress tests instead of STF_TIMEOUT so that we can have processes diff --git a/tests/zfs-tests/include/properties.shlib b/tests/zfs-tests/include/properties.shlib index bb0b4ff58..c495eecb4 100644 --- a/tests/zfs-tests/include/properties.shlib +++ b/tests/zfs-tests/include/properties.shlib @@ -16,7 +16,8 @@ typeset -a compress_props=('on' 'off' 'lzjb' 'gzip' 'gzip-1' 'gzip-2' 'gzip-3' 'gzip-4' 'gzip-5' 'gzip-6' 'gzip-7' 'gzip-8' 'gzip-9' 'zle') -typeset -a checksum_props=('on' 'off' 'fletcher2' 'fletcher4' 'sha256') +typeset -a checksum_props=('on' 'off' 'fletcher2' 'fletcher4' 'sha256' 'sha512' + 'edonr' 'skein' 'noparity') # # Given the property array passed in, return 'num_props' elements to the diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index 79d33a14b..ed01eafb4 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = \ cache \ cachefile \ casenorm \ + checksum \ clean_mirror \ cli_root \ cli_user \ diff --git a/tests/zfs-tests/tests/functional/checksum/.gitignore b/tests/zfs-tests/tests/functional/checksum/.gitignore new file mode 100644 index 000000000..0411d5aa4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/.gitignore @@ -0,0 +1,4 @@ +skein_test +edonr_test +sha2_test + diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am new file mode 100644 index 000000000..2d7d271a0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/Makefile.am @@ -0,0 +1,26 @@ +include $(top_srcdir)/config/Rules.am +AM_CPPFLAGS += -I$(top_srcdir)/include +LDADD = $(top_srcdir)/lib/libicp/libicp.la + +AUTOMAKE_OPTIONS = subdir-objects + +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum + +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + run_edonr_test.ksh \ + run_sha2_test.ksh \ + run_skein_test.ksh \ + filetest_001_pos.ksh + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum + +pkgexec_PROGRAMS = \ + edonr_test \ + skein_test \ + sha2_test + +edonr_test_SOURCES = edonr_test.c +skein_test_SOURCES = skein_test.c +sha2_test_SOURCES = sha2_test.c diff --git a/tests/zfs-tests/tests/functional/checksum/cleanup.ksh b/tests/zfs-tests/tests/functional/checksum/cleanup.ksh new file mode 100755 index 000000000..79cd6e9f9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/checksum/edonr_test.c b/tests/zfs-tests/tests/functional/checksum/edonr_test.c new file mode 100644 index 000000000..1ea8e991e --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/edonr_test.c @@ -0,0 +1,219 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ + +/* + * This is just to keep the compiler happy about sys/time.h not declaring + * gettimeofday due to -D_KERNEL (we can do this since we're actually + * running in userspace, but we need -D_KERNEL for the remaining Edon-R code). + */ +#ifdef _KERNEL +#undef _KERNEL +#endif + +#include +#include +#include +#include +#include +#define NOTE(x) +typedef enum boolean { B_FALSE, B_TRUE } boolean_t; +typedef unsigned long long u_longlong_t; + +/* + * Test messages from: + * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf + */ +const char *test_msg0 = "abc"; +const char *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn" + "lmnomnopnopq"; +const char *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi" + "jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"; + +/* + * Test digests computed by hand. There's no formal standard or spec for edonr. + */ +const uint8_t edonr_224_test_digests[][28] = { + { + /* for test_msg0 */ + 0x56, 0x63, 0xc4, 0x93, 0x95, 0x20, 0xfa, 0xf6, + 0x12, 0x31, 0x65, 0xa4, 0x66, 0xf2, 0x56, 0x01, + 0x95, 0x2e, 0xa9, 0xe4, 0x24, 0xdd, 0xc9, 0x6b, + 0xef, 0xd0, 0x40, 0x94 + }, + { + /* for test_msg1 */ + 0xd0, 0x13, 0xe4, 0x87, 0x4d, 0x06, 0x8d, 0xca, + 0x4e, 0x14, 0xb9, 0x37, 0x2f, 0xce, 0x12, 0x20, + 0x60, 0xf8, 0x5c, 0x0a, 0xfd, 0x7a, 0x7d, 0x97, + 0x88, 0x2b, 0x05, 0x75 + } + /* no test vector for test_msg2 */ +}; + +const uint8_t edonr_256_test_digests[][32] = { + { + /* for test_msg0 */ + 0x54, 0xd7, 0x8b, 0x13, 0xc7, 0x4e, 0xda, 0x5a, + 0xed, 0xc2, 0x71, 0xcc, 0x88, 0x1f, 0xb2, 0x2f, + 0x83, 0x99, 0xaf, 0xd3, 0x04, 0x0b, 0x6a, 0x39, + 0x2d, 0x73, 0x94, 0x05, 0x50, 0x8d, 0xd8, 0x51 + }, + { + /* for test_msg1 */ + 0x49, 0x2d, 0x0b, 0x19, 0xab, 0x1e, 0xde, 0x3a, + 0xea, 0x9b, 0xf2, 0x39, 0x3a, 0xb1, 0x21, 0xde, + 0x21, 0xf6, 0x80, 0x1f, 0xad, 0xbe, 0x8b, 0x07, + 0xc7, 0xfb, 0xe6, 0x99, 0x0e, 0x4d, 0x73, 0x63 + } + /* no test vectorfor test_msg2 */ +}; + +const uint8_t edonr_384_test_digests[][48] = { + { + /* for test_msg0 */ + 0x0e, 0x7c, 0xd7, 0x85, 0x78, 0x77, 0xe0, 0x89, + 0x5b, 0x1c, 0xdf, 0x49, 0xf4, 0x1d, 0x20, 0x9c, + 0x72, 0x7d, 0x2e, 0x57, 0x9b, 0x9b, 0x9a, 0xdc, + 0x60, 0x27, 0x97, 0x82, 0xb9, 0x90, 0x72, 0xec, + 0x7e, 0xce, 0xd3, 0x16, 0x5f, 0x47, 0x75, 0x48, + 0xfa, 0x60, 0x72, 0x7e, 0x01, 0xc7, 0x7c, 0xc6 + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0xe2, 0x34, 0xa1, 0x02, 0x83, 0x76, 0xae, 0xe6, + 0x82, 0xd9, 0x38, 0x32, 0x0e, 0x00, 0x78, 0xd2, + 0x34, 0xdb, 0xb9, 0xbd, 0xf0, 0x08, 0xa8, 0x0f, + 0x63, 0x1c, 0x3d, 0x4a, 0xfd, 0x0a, 0xe9, 0x59, + 0xdc, 0xd4, 0xce, 0xcd, 0x8d, 0x67, 0x6c, 0xea, + 0xbb, 0x1a, 0x32, 0xed, 0x5c, 0x6b, 0xf1, 0x7f + } +}; + +const uint8_t edonr_512_test_digests[][64] = { + { + /* for test_msg0 */ + 0x1b, 0x14, 0xdb, 0x15, 0x5f, 0x1d, 0x40, 0x65, + 0x94, 0xb8, 0xce, 0xf7, 0x0a, 0x43, 0x62, 0xec, + 0x6b, 0x5d, 0xe6, 0xa5, 0xda, 0xf5, 0x0e, 0xc9, + 0x99, 0xe9, 0x87, 0xc1, 0x9d, 0x30, 0x49, 0xe2, + 0xde, 0x59, 0x77, 0xbb, 0x05, 0xb1, 0xbb, 0x22, + 0x00, 0x50, 0xa1, 0xea, 0x5b, 0x46, 0xa9, 0xf1, + 0x74, 0x0a, 0xca, 0xfb, 0xf6, 0xb4, 0x50, 0x32, + 0xad, 0xc9, 0x0c, 0x62, 0x83, 0x72, 0xc2, 0x2b + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0x53, 0x51, 0x07, 0x0d, 0xc5, 0x1c, 0x3b, 0x2b, + 0xac, 0xa5, 0xa6, 0x0d, 0x02, 0x52, 0xcc, 0xb4, + 0xe4, 0x92, 0x1a, 0x96, 0xfe, 0x5a, 0x69, 0xe7, + 0x6d, 0xad, 0x48, 0xfd, 0x21, 0xa0, 0x84, 0x5a, + 0xd5, 0x7f, 0x88, 0x0b, 0x3e, 0x4a, 0x90, 0x7b, + 0xc5, 0x03, 0x15, 0x18, 0x42, 0xbb, 0x94, 0x9e, + 0x1c, 0xba, 0x74, 0x39, 0xa6, 0x40, 0x9a, 0x34, + 0xb8, 0x43, 0x6c, 0xb4, 0x69, 0x21, 0x58, 0x3c + } +}; + +int +main(int argc, char *argv[]) +{ + boolean_t failed = B_FALSE; + uint64_t cpu_mhz = 0; + + if (argc == 2) + cpu_mhz = atoi(argv[1]); + +#define EDONR_ALGO_TEST(_m, mode, testdigest) \ + do { \ + EdonRState ctx; \ + uint8_t digest[mode / 8]; \ + EdonRInit(&ctx, mode); \ + EdonRUpdate(&ctx, (const uint8_t *) _m, strlen(_m) * 8);\ + EdonRFinal(&ctx, digest); \ + (void) printf("Edon-R-%-6sMessage: " #_m \ + "\tResult: ", #mode); \ + if (bcmp(digest, testdigest, mode / 8) == 0) { \ + (void) printf("OK\n"); \ + } else { \ + (void) printf("FAILED!\n"); \ + failed = B_TRUE; \ + } \ + NOTE(CONSTCOND) \ + } while (0) + +#define EDONR_PERF_TEST(mode) \ + do { \ + EdonRState ctx; \ + uint8_t digest[mode / 8]; \ + uint8_t block[131072]; \ + uint64_t delta; \ + double cpb = 0; \ + int i; \ + struct timeval start, end; \ + bzero(block, sizeof (block)); \ + (void) gettimeofday(&start, NULL); \ + EdonRInit(&ctx, mode); \ + for (i = 0; i < 8192; i++) \ + EdonRUpdate(&ctx, block, sizeof (block) * 8); \ + EdonRFinal(&ctx, digest); \ + (void) gettimeofday(&end, NULL); \ + delta = (end.tv_sec * 1000000llu + end.tv_usec) - \ + (start.tv_sec * 1000000llu + start.tv_usec); \ + if (cpu_mhz != 0) { \ + cpb = (cpu_mhz * 1e6 * ((double)delta / \ + 1000000)) / (8192 * 128 * 1024); \ + } \ + (void) printf("Edon-R-%-6s%llu us (%.02f CPB)\n", #mode,\ + (u_longlong_t)delta, cpb); \ + NOTE(CONSTCOND) \ + } while (0) + + (void) printf("Running algorithm correctness tests:\n"); + EDONR_ALGO_TEST(test_msg0, 224, edonr_224_test_digests[0]); + EDONR_ALGO_TEST(test_msg1, 224, edonr_224_test_digests[1]); + EDONR_ALGO_TEST(test_msg0, 256, edonr_256_test_digests[0]); + EDONR_ALGO_TEST(test_msg1, 256, edonr_256_test_digests[1]); + EDONR_ALGO_TEST(test_msg0, 384, edonr_384_test_digests[0]); + EDONR_ALGO_TEST(test_msg2, 384, edonr_384_test_digests[2]); + EDONR_ALGO_TEST(test_msg0, 512, edonr_512_test_digests[0]); + EDONR_ALGO_TEST(test_msg2, 512, edonr_512_test_digests[2]); + if (failed) + return (1); + + (void) printf("Running performance tests (hashing 1024 MiB of " + "data):\n"); + EDONR_PERF_TEST(256); + EDONR_PERF_TEST(512); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh new file mode 100755 index 000000000..758b353c9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh @@ -0,0 +1,125 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib + +# DESCRIPTION: +# Sanity test to make sure checksum algorithms work. +# For each checksum, create a file in the pool using that checksum. Verify +# that there are no checksum errors. Next, for each checksum, create a single +# file in the pool using that checksum, scramble the underlying vdev, and +# verify that we correctly catch the checksum errors. +# +# STRATEGY: +# Test 1 +# 1. Create a mirrored pool +# 2. Create a file using each checksum +# 3. Export/import/scrub the pool +# 4. Verify there's no checksum errors. +# 5. Clear the pool +# +# Test 2 +# 6. For each checksum: +# 7. Create a file using the checksum +# 8. Export the pool +# 9. Scramble the data on one of the underlying VDEVs +# 10. Import the pool +# 11. Scrub the pool +# 12. Verify that there are checksum errors + +verify_runnable "both" + +function cleanup +{ + $ECHO cleanup + [[ -e $TESTDIR ]] && \ + log_must $RM -rf $TESTDIR/* > /dev/null 2>&1 +} + +log_assert "Create and read back files with using different checksum algorithms" + +log_onexit cleanup + +FSSIZE=$($ZPOOL list -Hp -o size $TESTPOOL) +WRITESZ=1048576 +WRITECNT=$((($FSSIZE) / $WRITESZ )) +# Skip the first and last 4MB +SKIP=4127518 +SKIPCNT=$((($SKIP / $WRITESZ ))) +SKIPCNT=$((($SKIPCNT * 2))) +WRITECNT=$((($WRITECNT - $SKIPCNT))) + +# Get a list of vdevs in our pool +set -A array $(get_disklist_fullpath) + +# Get the first vdev, since we will corrupt it later +firstvdev=${array[0]} + +# First test each checksum by writing a file using it, and confirm there's no +# errors. +for ((count = 0; count < ${#checksum_props[*]} ; count++)); do + i=${checksum_props[$count]} + $ZFS set checksum=$i $TESTPOOL + $FILE_WRITE -o overwrite -f $TESTDIR/test_$i -b $WRITESZ -c 5 -d R +done +$ZPOOL export $TESTPOOL +$ZPOOL import $TESTPOOL +$ZPOOL scrub $TESTPOOL +while is_pool_scrubbing $TESTPOOL; do + $SLEEP 1 +done +$ZPOOL status -P -v $TESTPOOL | grep $firstvdev | read -r name state rd wr cksum +log_assert "Normal file write test saw: $cksum errors" +log_must [ $cksum -eq 0 ] + +rm -fr $TESTDIR/* + +log_assert "Test scrambling the disk and seeing checksum errors" +for ((count = 0; count < ${#checksum_props[*]} ; count++)); do + i=${checksum_props[$count]} + $ZFS set checksum=$i $TESTPOOL + $FILE_WRITE -o overwrite -f $TESTDIR/test_$i -b $WRITESZ -c 5 -d R + + $ZPOOL export $TESTPOOL + + # Scramble the data on the first vdev in our pool. + # Skip the first and last 16MB of data, then scramble the rest after that + # + $FILE_WRITE -o overwrite -f $firstvdev -s $SKIP -c $WRITECNT -b $WRITESZ -d R + + $ZPOOL import $TESTPOOL + + i=${checksum_props[$count]} + $ZPOOL scrub $TESTPOOL + while is_pool_scrubbing $TESTPOOL; do + $SLEEP 1 + done + + $ZPOOL status -P -v $TESTPOOL | grep $firstvdev | read -r name state rd wr cksum + + log_assert "Checksum '$i' caught $cksum checksum errors" + log_must [ $cksum -ne 0 ] + + rm -f $TESTDIR/test_$i + $ZPOOL clear $TESTPOOL +done diff --git a/tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh new file mode 100755 index 000000000..7bcb321f2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# Run the tests for the EdonR hash algorithm. +# + +log_assert "Run the tests for the EdonR hash algorithm." + +freq=$(get_cpu_freq) +log_must $STF_SUITE/tests/functional/checksum/edonr_test $freq + +log_pass "EdonR tests passed." diff --git a/tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh new file mode 100755 index 000000000..589e28a7f --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# Run the tests for the SHA-2 hash algorithm. +# + +log_assert "Run the tests for the SHA-2 hash algorithm." + +freq=$(get_cpu_freq) +log_must $STF_SUITE/tests/functional/checksum/sha2_test $freq + +log_pass "SHA-2 tests passed." diff --git a/tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh new file mode 100755 index 000000000..4290bfc79 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# Description: +# Run the tests for the Skein hash algorithm. +# + +log_assert "Run the tests for the Skein hash algorithm." + +freq=$(get_cpu_freq) +log_must $STF_SUITE/tests/functional/checksum/skein_test $freq + +log_pass "Skein tests passed." diff --git a/tests/zfs-tests/tests/functional/checksum/setup.ksh b/tests/zfs-tests/tests/functional/checksum/setup.ksh new file mode 100755 index 000000000..27e125df4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/setup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_mirror_setup $DISKS diff --git a/tests/zfs-tests/tests/functional/checksum/sha2_test.c b/tests/zfs-tests/tests/functional/checksum/sha2_test.c new file mode 100644 index 000000000..afd6f8243 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/sha2_test.c @@ -0,0 +1,265 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ + +/* + * This is just to keep the compiler happy about sys/time.h not declaring + * gettimeofday due to -D_KERNEL (we can do this since we're actually + * running in userspace, but we need -D_KERNEL for the remaining SHA2 code). + */ +#ifdef _KERNEL +#undef _KERNEL +#endif + +#include +#include +#include +#include +#include +#define _SHA2_IMPL +#include +#define NOTE(x) +typedef enum boolean { B_FALSE, B_TRUE } boolean_t; +typedef unsigned long long u_longlong_t; + + +/* + * Test messages from: + * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf + */ + +const char *test_msg0 = "abc"; +const char *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn" + "lmnomnopnopq"; +const char *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi" + "jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"; + +/* + * Test digests from: + * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf + */ +const uint8_t sha256_test_digests[][32] = { + { + /* for test_msg0 */ + 0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 0xCF, 0xEA, + 0x41, 0x41, 0x40, 0xDE, 0x5D, 0xAE, 0x22, 0x23, + 0xB0, 0x03, 0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C, + 0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 0x15, 0xAD + }, + { + /* for test_msg1 */ + 0x24, 0x8D, 0x6A, 0x61, 0xD2, 0x06, 0x38, 0xB8, + 0xE5, 0xC0, 0x26, 0x93, 0x0C, 0x3E, 0x60, 0x39, + 0xA3, 0x3C, 0xE4, 0x59, 0x64, 0xFF, 0x21, 0x67, + 0xF6, 0xEC, 0xED, 0xD4, 0x19, 0xDB, 0x06, 0xC1 + } + /* no test vector for test_msg2 */ +}; + +const uint8_t sha384_test_digests[][48] = { + { + /* for test_msg0 */ + 0xCB, 0x00, 0x75, 0x3F, 0x45, 0xA3, 0x5E, 0x8B, + 0xB5, 0xA0, 0x3D, 0x69, 0x9A, 0xC6, 0x50, 0x07, + 0x27, 0x2C, 0x32, 0xAB, 0x0E, 0xDE, 0xD1, 0x63, + 0x1A, 0x8B, 0x60, 0x5A, 0x43, 0xFF, 0x5B, 0xED, + 0x80, 0x86, 0x07, 0x2B, 0xA1, 0xE7, 0xCC, 0x23, + 0x58, 0xBA, 0xEC, 0xA1, 0x34, 0xC8, 0x25, 0xA7 + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0x09, 0x33, 0x0C, 0x33, 0xF7, 0x11, 0x47, 0xE8, + 0x3D, 0x19, 0x2F, 0xC7, 0x82, 0xCD, 0x1B, 0x47, + 0x53, 0x11, 0x1B, 0x17, 0x3B, 0x3B, 0x05, 0xD2, + 0x2F, 0xA0, 0x80, 0x86, 0xE3, 0xB0, 0xF7, 0x12, + 0xFC, 0xC7, 0xC7, 0x1A, 0x55, 0x7E, 0x2D, 0xB9, + 0x66, 0xC3, 0xE9, 0xFA, 0x91, 0x74, 0x60, 0x39 + } +}; + +const uint8_t sha512_test_digests[][64] = { + { + /* for test_msg0 */ + 0xDD, 0xAF, 0x35, 0xA1, 0x93, 0x61, 0x7A, 0xBA, + 0xCC, 0x41, 0x73, 0x49, 0xAE, 0x20, 0x41, 0x31, + 0x12, 0xE6, 0xFA, 0x4E, 0x89, 0xA9, 0x7E, 0xA2, + 0x0A, 0x9E, 0xEE, 0xE6, 0x4B, 0x55, 0xD3, 0x9A, + 0x21, 0x92, 0x99, 0x2A, 0x27, 0x4F, 0xC1, 0xA8, + 0x36, 0xBA, 0x3C, 0x23, 0xA3, 0xFE, 0xEB, 0xBD, + 0x45, 0x4D, 0x44, 0x23, 0x64, 0x3C, 0xE8, 0x0E, + 0x2A, 0x9A, 0xC9, 0x4F, 0xA5, 0x4C, 0xA4, 0x9F + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0x8E, 0x95, 0x9B, 0x75, 0xDA, 0xE3, 0x13, 0xDA, + 0x8C, 0xF4, 0xF7, 0x28, 0x14, 0xFC, 0x14, 0x3F, + 0x8F, 0x77, 0x79, 0xC6, 0xEB, 0x9F, 0x7F, 0xA1, + 0x72, 0x99, 0xAE, 0xAD, 0xB6, 0x88, 0x90, 0x18, + 0x50, 0x1D, 0x28, 0x9E, 0x49, 0x00, 0xF7, 0xE4, + 0x33, 0x1B, 0x99, 0xDE, 0xC4, 0xB5, 0x43, 0x3A, + 0xC7, 0xD3, 0x29, 0xEE, 0xB6, 0xDD, 0x26, 0x54, + 0x5E, 0x96, 0xE5, 0x5B, 0x87, 0x4B, 0xE9, 0x09 + } +}; + +const uint8_t sha512_224_test_digests[][28] = { + { + /* for test_msg0 */ + 0x46, 0x34, 0x27, 0x0F, 0x70, 0x7B, 0x6A, 0x54, + 0xDA, 0xAE, 0x75, 0x30, 0x46, 0x08, 0x42, 0xE2, + 0x0E, 0x37, 0xED, 0x26, 0x5C, 0xEE, 0xE9, 0xA4, + 0x3E, 0x89, 0x24, 0xAA + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0x23, 0xFE, 0xC5, 0xBB, 0x94, 0xD6, 0x0B, 0x23, + 0x30, 0x81, 0x92, 0x64, 0x0B, 0x0C, 0x45, 0x33, + 0x35, 0xD6, 0x64, 0x73, 0x4F, 0xE4, 0x0E, 0x72, + 0x68, 0x67, 0x4A, 0xF9 + } +}; + +const uint8_t sha512_256_test_digests[][32] = { + { + /* for test_msg0 */ + 0x53, 0x04, 0x8E, 0x26, 0x81, 0x94, 0x1E, 0xF9, + 0x9B, 0x2E, 0x29, 0xB7, 0x6B, 0x4C, 0x7D, 0xAB, + 0xE4, 0xC2, 0xD0, 0xC6, 0x34, 0xFC, 0x6D, 0x46, + 0xE0, 0xE2, 0xF1, 0x31, 0x07, 0xE7, 0xAF, 0x23 + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* for test_msg2 */ + 0x39, 0x28, 0xE1, 0x84, 0xFB, 0x86, 0x90, 0xF8, + 0x40, 0xDA, 0x39, 0x88, 0x12, 0x1D, 0x31, 0xBE, + 0x65, 0xCB, 0x9D, 0x3E, 0xF8, 0x3E, 0xE6, 0x14, + 0x6F, 0xEA, 0xC8, 0x61, 0xE1, 0x9B, 0x56, 0x3A + } +}; + +/* + * Local reimplementation of cmn_err, since it's used in sha2.c. + */ +/*ARGSUSED*/ +void +cmn_err(int level, char *format, ...) +{ + va_list ap; + va_start(ap, format); + /* LINTED: E_SEC_PRINTF_VAR_FMT */ + (void) vfprintf(stderr, format, ap); + va_end(ap); +} + +int +main(int argc, char *argv[]) +{ + boolean_t failed = B_FALSE; + uint64_t cpu_mhz = 0; + + if (argc == 2) + cpu_mhz = atoi(argv[1]); + +#define SHA2_ALGO_TEST(_m, mode, diglen, testdigest) \ + do { \ + SHA2_CTX ctx; \ + uint8_t digest[diglen / 8]; \ + SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx); \ + SHA2Update(&ctx, _m, strlen(_m)); \ + SHA2Final(digest, &ctx); \ + (void) printf("SHA%-9sMessage: " #_m \ + "\tResult: ", #mode); \ + if (bcmp(digest, testdigest, diglen / 8) == 0) { \ + (void) printf("OK\n"); \ + } else { \ + (void) printf("FAILED!\n"); \ + failed = B_TRUE; \ + } \ + NOTE(CONSTCOND) \ + } while (0) + +#define SHA2_PERF_TEST(mode, diglen) \ + do { \ + SHA2_CTX ctx; \ + uint8_t digest[diglen / 8]; \ + uint8_t block[131072]; \ + uint64_t delta; \ + double cpb = 0; \ + int i; \ + struct timeval start, end; \ + bzero(block, sizeof (block)); \ + (void) gettimeofday(&start, NULL); \ + SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx); \ + for (i = 0; i < 8192; i++) \ + SHA2Update(&ctx, block, sizeof (block)); \ + SHA2Final(digest, &ctx); \ + (void) gettimeofday(&end, NULL); \ + delta = (end.tv_sec * 1000000llu + end.tv_usec) - \ + (start.tv_sec * 1000000llu + start.tv_usec); \ + if (cpu_mhz != 0) { \ + cpb = (cpu_mhz * 1e6 * ((double)delta / \ + 1000000)) / (8192 * 128 * 1024); \ + } \ + (void) printf("SHA%-9s%llu us (%.02f CPB)\n", #mode, \ + (u_longlong_t)delta, cpb); \ + NOTE(CONSTCOND) \ + } while (0) + + (void) printf("Running algorithm correctness tests:\n"); + SHA2_ALGO_TEST(test_msg0, 256, 256, sha256_test_digests[0]); + SHA2_ALGO_TEST(test_msg1, 256, 256, sha256_test_digests[1]); + SHA2_ALGO_TEST(test_msg0, 384, 384, sha384_test_digests[0]); + SHA2_ALGO_TEST(test_msg2, 384, 384, sha384_test_digests[2]); + SHA2_ALGO_TEST(test_msg0, 512, 512, sha512_test_digests[0]); + SHA2_ALGO_TEST(test_msg2, 512, 512, sha512_test_digests[2]); + SHA2_ALGO_TEST(test_msg0, 512_224, 224, sha512_224_test_digests[0]); + SHA2_ALGO_TEST(test_msg2, 512_224, 224, sha512_224_test_digests[2]); + SHA2_ALGO_TEST(test_msg0, 512_256, 256, sha512_256_test_digests[0]); + SHA2_ALGO_TEST(test_msg2, 512_256, 256, sha512_256_test_digests[2]); + + if (failed) + return (1); + + (void) printf("Running performance tests (hashing 1024 MiB of " + "data):\n"); + SHA2_PERF_TEST(256, 256); + SHA2_PERF_TEST(512, 512); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/checksum/skein_test.c b/tests/zfs-tests/tests/functional/checksum/skein_test.c new file mode 100644 index 000000000..37548f03b --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/skein_test.c @@ -0,0 +1,342 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2013 Saso Kiselkov. All rights reserved. + */ + +/* + * This is just to keep the compiler happy about sys/time.h not declaring + * gettimeofday due to -D_KERNEL (we can do this since we're actually + * running in userspace, but we need -D_KERNEL for the remaining Skein code). + */ +#ifdef _KERNEL +#undef _KERNEL +#endif + +#include +#include +#include +#include +#include +#define NOTE(x) + +typedef enum boolean { B_FALSE, B_TRUE } boolean_t; +typedef unsigned long long u_longlong_t; + +/* + * Skein test suite using values from the Skein V1.3 specification found at: + * http://www.skein-hash.info/sites/default/files/skein1.3.pdf + */ + +/* + * Test messages from the Skein spec, Appendix C. + */ +const uint8_t test_msg0[] = { + 0xFF +}; + +const uint8_t test_msg1[] = { + 0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8, + 0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0, + 0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8, + 0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0 +}; + +const uint8_t test_msg2[] = { + 0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8, + 0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0, + 0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8, + 0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0, + 0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8, + 0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0, + 0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8, + 0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0 +}; + +const uint8_t test_msg3[] = { + 0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8, + 0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0, + 0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8, + 0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0, + 0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8, + 0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0, + 0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8, + 0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0, + 0xBF, 0xBE, 0xBD, 0xBC, 0xBB, 0xBA, 0xB9, 0xB8, + 0xB7, 0xB6, 0xB5, 0xB4, 0xB3, 0xB2, 0xB1, 0xB0, + 0xAF, 0xAE, 0xAD, 0xAC, 0xAB, 0xAA, 0xA9, 0xA8, + 0xA7, 0xA6, 0xA5, 0xA4, 0xA3, 0xA2, 0xA1, 0xA0, + 0x9F, 0x9E, 0x9D, 0x9C, 0x9B, 0x9A, 0x99, 0x98, + 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, + 0x8F, 0x8E, 0x8D, 0x8C, 0x8B, 0x8A, 0x89, 0x88, + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80 +}; + +const uint8_t test_msg4[] = { + 0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8, + 0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0, + 0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8, + 0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0, + 0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8, + 0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0, + 0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8, + 0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0, + 0xBF, 0xBE, 0xBD, 0xBC, 0xBB, 0xBA, 0xB9, 0xB8, + 0xB7, 0xB6, 0xB5, 0xB4, 0xB3, 0xB2, 0xB1, 0xB0, + 0xAF, 0xAE, 0xAD, 0xAC, 0xAB, 0xAA, 0xA9, 0xA8, + 0xA7, 0xA6, 0xA5, 0xA4, 0xA3, 0xA2, 0xA1, 0xA0, + 0x9F, 0x9E, 0x9D, 0x9C, 0x9B, 0x9A, 0x99, 0x98, + 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, + 0x8F, 0x8E, 0x8D, 0x8C, 0x8B, 0x8A, 0x89, 0x88, + 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, + 0x7F, 0x7E, 0x7D, 0x7C, 0x7B, 0x7A, 0x79, 0x78, + 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, + 0x6F, 0x6E, 0x6D, 0x6C, 0x6B, 0x6A, 0x69, 0x68, + 0x67, 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, + 0x5F, 0x5E, 0x5D, 0x5C, 0x5B, 0x5A, 0x59, 0x58, + 0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, + 0x4F, 0x4E, 0x4D, 0x4C, 0x4B, 0x4A, 0x49, 0x48, + 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40, + 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, + 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, + 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, + 0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +}; + +/* + * Test digests from the Skein spec, Appendix C. + */ +const uint8_t skein_256_test_digests[][32] = { + { + /* for test_msg0 */ + 0x0B, 0x98, 0xDC, 0xD1, 0x98, 0xEA, 0x0E, 0x50, + 0xA7, 0xA2, 0x44, 0xC4, 0x44, 0xE2, 0x5C, 0x23, + 0xDA, 0x30, 0xC1, 0x0F, 0xC9, 0xA1, 0xF2, 0x70, + 0xA6, 0x63, 0x7F, 0x1F, 0x34, 0xE6, 0x7E, 0xD2 + }, + { + /* for test_msg1 */ + 0x8D, 0x0F, 0xA4, 0xEF, 0x77, 0x7F, 0xD7, 0x59, + 0xDF, 0xD4, 0x04, 0x4E, 0x6F, 0x6A, 0x5A, 0xC3, + 0xC7, 0x74, 0xAE, 0xC9, 0x43, 0xDC, 0xFC, 0x07, + 0x92, 0x7B, 0x72, 0x3B, 0x5D, 0xBF, 0x40, 0x8B + }, + { + /* for test_msg2 */ + 0xDF, 0x28, 0xE9, 0x16, 0x63, 0x0D, 0x0B, 0x44, + 0xC4, 0xA8, 0x49, 0xDC, 0x9A, 0x02, 0xF0, 0x7A, + 0x07, 0xCB, 0x30, 0xF7, 0x32, 0x31, 0x82, 0x56, + 0xB1, 0x5D, 0x86, 0x5A, 0xC4, 0xAE, 0x16, 0x2F + } + /* no test digests for test_msg3 and test_msg4 */ +}; + +const uint8_t skein_512_test_digests[][64] = { + { + /* for test_msg0 */ + 0x71, 0xB7, 0xBC, 0xE6, 0xFE, 0x64, 0x52, 0x22, + 0x7B, 0x9C, 0xED, 0x60, 0x14, 0x24, 0x9E, 0x5B, + 0xF9, 0xA9, 0x75, 0x4C, 0x3A, 0xD6, 0x18, 0xCC, + 0xC4, 0xE0, 0xAA, 0xE1, 0x6B, 0x31, 0x6C, 0xC8, + 0xCA, 0x69, 0x8D, 0x86, 0x43, 0x07, 0xED, 0x3E, + 0x80, 0xB6, 0xEF, 0x15, 0x70, 0x81, 0x2A, 0xC5, + 0x27, 0x2D, 0xC4, 0x09, 0xB5, 0xA0, 0x12, 0xDF, + 0x2A, 0x57, 0x91, 0x02, 0xF3, 0x40, 0x61, 0x7A + }, + { + /* no test vector for test_msg1 */ + 0, + }, + { + /* for test_msg2 */ + 0x45, 0x86, 0x3B, 0xA3, 0xBE, 0x0C, 0x4D, 0xFC, + 0x27, 0xE7, 0x5D, 0x35, 0x84, 0x96, 0xF4, 0xAC, + 0x9A, 0x73, 0x6A, 0x50, 0x5D, 0x93, 0x13, 0xB4, + 0x2B, 0x2F, 0x5E, 0xAD, 0xA7, 0x9F, 0xC1, 0x7F, + 0x63, 0x86, 0x1E, 0x94, 0x7A, 0xFB, 0x1D, 0x05, + 0x6A, 0xA1, 0x99, 0x57, 0x5A, 0xD3, 0xF8, 0xC9, + 0xA3, 0xCC, 0x17, 0x80, 0xB5, 0xE5, 0xFA, 0x4C, + 0xAE, 0x05, 0x0E, 0x98, 0x98, 0x76, 0x62, 0x5B + }, + { + /* for test_msg3 */ + 0x91, 0xCC, 0xA5, 0x10, 0xC2, 0x63, 0xC4, 0xDD, + 0xD0, 0x10, 0x53, 0x0A, 0x33, 0x07, 0x33, 0x09, + 0x62, 0x86, 0x31, 0xF3, 0x08, 0x74, 0x7E, 0x1B, + 0xCB, 0xAA, 0x90, 0xE4, 0x51, 0xCA, 0xB9, 0x2E, + 0x51, 0x88, 0x08, 0x7A, 0xF4, 0x18, 0x87, 0x73, + 0xA3, 0x32, 0x30, 0x3E, 0x66, 0x67, 0xA7, 0xA2, + 0x10, 0x85, 0x6F, 0x74, 0x21, 0x39, 0x00, 0x00, + 0x71, 0xF4, 0x8E, 0x8B, 0xA2, 0xA5, 0xAD, 0xB7 + } + /* no test digests for test_msg4 */ +}; + +const uint8_t skein_1024_test_digests[][128] = { + { + /* for test_msg0 */ + 0xE6, 0x2C, 0x05, 0x80, 0x2E, 0xA0, 0x15, 0x24, + 0x07, 0xCD, 0xD8, 0x78, 0x7F, 0xDA, 0x9E, 0x35, + 0x70, 0x3D, 0xE8, 0x62, 0xA4, 0xFB, 0xC1, 0x19, + 0xCF, 0xF8, 0x59, 0x0A, 0xFE, 0x79, 0x25, 0x0B, + 0xCC, 0xC8, 0xB3, 0xFA, 0xF1, 0xBD, 0x24, 0x22, + 0xAB, 0x5C, 0x0D, 0x26, 0x3F, 0xB2, 0xF8, 0xAF, + 0xB3, 0xF7, 0x96, 0xF0, 0x48, 0x00, 0x03, 0x81, + 0x53, 0x1B, 0x6F, 0x00, 0xD8, 0x51, 0x61, 0xBC, + 0x0F, 0xFF, 0x4B, 0xEF, 0x24, 0x86, 0xB1, 0xEB, + 0xCD, 0x37, 0x73, 0xFA, 0xBF, 0x50, 0xAD, 0x4A, + 0xD5, 0x63, 0x9A, 0xF9, 0x04, 0x0E, 0x3F, 0x29, + 0xC6, 0xC9, 0x31, 0x30, 0x1B, 0xF7, 0x98, 0x32, + 0xE9, 0xDA, 0x09, 0x85, 0x7E, 0x83, 0x1E, 0x82, + 0xEF, 0x8B, 0x46, 0x91, 0xC2, 0x35, 0x65, 0x65, + 0x15, 0xD4, 0x37, 0xD2, 0xBD, 0xA3, 0x3B, 0xCE, + 0xC0, 0x01, 0xC6, 0x7F, 0xFD, 0xE1, 0x5B, 0xA8 + }, + { + /* no test vector for test_msg1 */ + 0 + }, + { + /* no test vector for test_msg2 */ + 0 + }, + { + /* for test_msg3 */ + 0x1F, 0x3E, 0x02, 0xC4, 0x6F, 0xB8, 0x0A, 0x3F, + 0xCD, 0x2D, 0xFB, 0xBC, 0x7C, 0x17, 0x38, 0x00, + 0xB4, 0x0C, 0x60, 0xC2, 0x35, 0x4A, 0xF5, 0x51, + 0x18, 0x9E, 0xBF, 0x43, 0x3C, 0x3D, 0x85, 0xF9, + 0xFF, 0x18, 0x03, 0xE6, 0xD9, 0x20, 0x49, 0x31, + 0x79, 0xED, 0x7A, 0xE7, 0xFC, 0xE6, 0x9C, 0x35, + 0x81, 0xA5, 0xA2, 0xF8, 0x2D, 0x3E, 0x0C, 0x7A, + 0x29, 0x55, 0x74, 0xD0, 0xCD, 0x7D, 0x21, 0x7C, + 0x48, 0x4D, 0x2F, 0x63, 0x13, 0xD5, 0x9A, 0x77, + 0x18, 0xEA, 0xD0, 0x7D, 0x07, 0x29, 0xC2, 0x48, + 0x51, 0xD7, 0xE7, 0xD2, 0x49, 0x1B, 0x90, 0x2D, + 0x48, 0x91, 0x94, 0xE6, 0xB7, 0xD3, 0x69, 0xDB, + 0x0A, 0xB7, 0xAA, 0x10, 0x6F, 0x0E, 0xE0, 0xA3, + 0x9A, 0x42, 0xEF, 0xC5, 0x4F, 0x18, 0xD9, 0x37, + 0x76, 0x08, 0x09, 0x85, 0xF9, 0x07, 0x57, 0x4F, + 0x99, 0x5E, 0xC6, 0xA3, 0x71, 0x53, 0xA5, 0x78 + }, + { + /* for test_msg4 */ + 0x84, 0x2A, 0x53, 0xC9, 0x9C, 0x12, 0xB0, 0xCF, + 0x80, 0xCF, 0x69, 0x49, 0x1B, 0xE5, 0xE2, 0xF7, + 0x51, 0x5D, 0xE8, 0x73, 0x3B, 0x6E, 0xA9, 0x42, + 0x2D, 0xFD, 0x67, 0x66, 0x65, 0xB5, 0xFA, 0x42, + 0xFF, 0xB3, 0xA9, 0xC4, 0x8C, 0x21, 0x77, 0x77, + 0x95, 0x08, 0x48, 0xCE, 0xCD, 0xB4, 0x8F, 0x64, + 0x0F, 0x81, 0xFB, 0x92, 0xBE, 0xF6, 0xF8, 0x8F, + 0x7A, 0x85, 0xC1, 0xF7, 0xCD, 0x14, 0x46, 0xC9, + 0x16, 0x1C, 0x0A, 0xFE, 0x8F, 0x25, 0xAE, 0x44, + 0x4F, 0x40, 0xD3, 0x68, 0x00, 0x81, 0xC3, 0x5A, + 0xA4, 0x3F, 0x64, 0x0F, 0xD5, 0xFA, 0x3C, 0x3C, + 0x03, 0x0B, 0xCC, 0x06, 0xAB, 0xAC, 0x01, 0xD0, + 0x98, 0xBC, 0xC9, 0x84, 0xEB, 0xD8, 0x32, 0x27, + 0x12, 0x92, 0x1E, 0x00, 0xB1, 0xBA, 0x07, 0xD6, + 0xD0, 0x1F, 0x26, 0x90, 0x70, 0x50, 0x25, 0x5E, + 0xF2, 0xC8, 0xE2, 0x4F, 0x71, 0x6C, 0x52, 0xA5 + } +}; + +int +main(int argc, char *argv[]) +{ + boolean_t failed = B_FALSE; + uint64_t cpu_mhz = 0; + + if (argc == 2) + cpu_mhz = atoi(argv[1]); + +#define SKEIN_ALGO_TEST(_m, mode, diglen, testdigest) \ + do { \ + Skein ## mode ## _Ctxt_t ctx; \ + uint8_t digest[diglen / 8]; \ + (void) Skein ## mode ## _Init(&ctx, diglen); \ + (void) Skein ## mode ## _Update(&ctx, _m, sizeof (_m)); \ + (void) Skein ## mode ## _Final(&ctx, digest); \ + (void) printf("Skein" #mode "/" #diglen \ + "\tMessage: " #_m "\tResult: "); \ + if (bcmp(digest, testdigest, diglen / 8) == 0) { \ + (void) printf("OK\n"); \ + } else { \ + (void) printf("FAILED!\n"); \ + failed = B_TRUE; \ + } \ + NOTE(CONSTCOND) \ + } while (0) + +#define SKEIN_PERF_TEST(mode, diglen) \ + do { \ + Skein ## mode ## _Ctxt_t ctx; \ + uint8_t digest[diglen / 8]; \ + uint8_t block[131072]; \ + uint64_t delta; \ + double cpb = 0; \ + int i; \ + struct timeval start, end; \ + bzero(block, sizeof (block)); \ + (void) gettimeofday(&start, NULL); \ + (void) Skein ## mode ## _Init(&ctx, diglen); \ + for (i = 0; i < 8192; i++) { \ + (void) Skein ## mode ## _Update(&ctx, block, \ + sizeof (block)); \ + } \ + (void) Skein ## mode ## _Final(&ctx, digest); \ + (void) gettimeofday(&end, NULL); \ + delta = (end.tv_sec * 1000000llu + end.tv_usec) - \ + (start.tv_sec * 1000000llu + start.tv_usec); \ + if (cpu_mhz != 0) { \ + cpb = (cpu_mhz * 1e6 * ((double)delta / \ + 1000000)) / (8192 * 128 * 1024); \ + } \ + (void) printf("Skein" #mode "/" #diglen "\t%llu us " \ + "(%.02f CPB)\n", (u_longlong_t)delta, cpb); \ + NOTE(CONSTCOND) \ + } while (0) + + (void) printf("Running algorithm correctness tests:\n"); + SKEIN_ALGO_TEST(test_msg0, _256, 256, skein_256_test_digests[0]); + SKEIN_ALGO_TEST(test_msg1, _256, 256, skein_256_test_digests[1]); + SKEIN_ALGO_TEST(test_msg2, _256, 256, skein_256_test_digests[2]); + SKEIN_ALGO_TEST(test_msg0, _512, 512, skein_512_test_digests[0]); + SKEIN_ALGO_TEST(test_msg2, _512, 512, skein_512_test_digests[2]); + SKEIN_ALGO_TEST(test_msg3, _512, 512, skein_512_test_digests[3]); + SKEIN_ALGO_TEST(test_msg0, 1024, 1024, skein_1024_test_digests[0]); + SKEIN_ALGO_TEST(test_msg3, 1024, 1024, skein_1024_test_digests[3]); + SKEIN_ALGO_TEST(test_msg4, 1024, 1024, skein_1024_test_digests[4]); + if (failed) + return (1); + + (void) printf("Running performance tests (hashing 1024 MiB of " + "data):\n"); + SKEIN_PERF_TEST(_256, 256); + SKEIN_PERF_TEST(_512, 512); + SKEIN_PERF_TEST(1024, 1024); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh index edc7a3fb9..27003b21b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh @@ -46,7 +46,7 @@ verify_runnable "both" set -A dataset "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL/$TESTVOL" -set -A values "on" "off" "fletcher2" "fletcher4" "sha256" +set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr" "noparity" log_assert "Setting a valid checksum on a file system, volume," \ "it should be successful." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index f7a1d9cb1..3807d0af6 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -37,7 +37,8 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" "feature@large_blocks" "feature@large_dnode" "feature@filesystem_limits" "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth" - "feature@extensible_dataset" "feature@bookmarks" "feature@embedded_data") + "feature@extensible_dataset" "feature@bookmarks" "feature@embedded_data" + "feature@sha512" "feature@skein" "feature@edonr") else typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version" "bootfs" ""leaked" delegation" "autoreplace" "cachefile" "dedupditto" "dedupratio" @@ -45,5 +46,6 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version" "listsnapshots" "autoexpand" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" "feature@multi_vdev_crash_dump" "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth" - "feature@extensible_dataset" "feature@bookmarks") + "feature@extensible_dataset" "feature@bookmarks" "feature@sha512" + "feature@skein" "feature@edonr") fi From 4a2e9a17d5b81ae97b2a1b72437def9ed4996aca Mon Sep 17 00:00:00 2001 From: ilovezfs Date: Mon, 25 Jan 2016 23:41:11 -0800 Subject: [PATCH 2/4] OpenZFS 6541 - Pool feature-flag check defeated if "verify" is included in the dedup property value Authored by: ilovezfs Reviewed by: Matthew Ahrens Reviewed by: Richard Laager Approved by: Robert Mustacchi Ported-by: Tony Hutter zio_checksum_to_feature() expects a zio_checksum enum not a raw property intval, so the new checksums weren't being detected when the ZIO_CHECKSUM_VERIFY flag got in the way. Given a pool without feature@sha512, zfs create -o dedup=sha512 naughty/fivetwelve_noverify_ds would fail as expected since the raw intval would indeed be equal to SPA_FEATURE_SHA512. However, zfs create -o dedup=sha512,verify naughty/fivetwelve_verify_ds would incorrectly succeed because ZIO_CHECKSUM_VERIFY would be in the way, the raw intval would not be a member of the enum, and zio_checksum_to_feature() would return SPA_FEATURE_NONE, with the result that spa_feature_is_enabled() would never be called. This was first detected with edonr, since in that case verify is required. This commit clears the ZIO_CHECKSUM_VERIFY flag before calling zio_checksum_to_feature() using the ZIO_CHECKSUM_MASK and verifies in zio_checksum_to_feature() that ZIO_CHECKSUM_MASK has been applied by the caller to attempt to prevent the same bug from occurring again in the future. OpenZFS-issue: https://www.illumos.org/issues/6541 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/971640e6aa954c91b0706543741aa4570299f4d7 Porting notes: This code was originally from Illumos, but I actually ported it from: openzfsonosx/zfs@bef06e1 --- module/zfs/zfs_ioctl.c | 2 +- module/zfs/zio_checksum.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 9140c62a6..e5704e258 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -3906,7 +3906,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (SET_ERROR(EINVAL)); /* check prop value is enabled in features */ - feature = zio_checksum_to_feature(intval); + feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK); if (feature == SPA_FEATURE_NONE) break; diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c index 59871c50e..d3d2f05a8 100644 --- a/module/zfs/zio_checksum.c +++ b/module/zfs/zio_checksum.c @@ -135,9 +135,15 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { ZCHECKSUM_FLAG_NOPWRITE, "edonr"}, }; +/* + * The flag corresponding to the "verify" in dedup=[checksum,]verify + * must be cleared first, so callers should use ZIO_CHECKSUM_MASK. + */ spa_feature_t zio_checksum_to_feature(enum zio_checksum cksum) { + VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0); + switch (cksum) { case ZIO_CHECKSUM_SHA512: return (SPA_FEATURE_SHA512); From 125a406e24fbfaec29557e88746210b9f18f646c Mon Sep 17 00:00:00 2001 From: ilovezfs Date: Thu, 28 Jan 2016 04:51:19 -0800 Subject: [PATCH 3/4] OpenZFS 6585 - sha512, skein, and edonr have an unenforced dependency on extensible dataset Authored by: ilovezfs Reviewed by: Matthew Ahrens Reviewed by: Richard Laager Approved by: Robert Mustacchi Ported by: Tony Hutter In any pool without the extensible dataset feature flag already enabled, creating a dataset with dedup set to use one of the new checksums would result in the following panic as soon as any data was added: panic[cpu0]/thread=ffffff0006761c40: feature_get_refcount(spa, feature, &refcount) != 48 (0x30 != 0x30), file: ../../common/fs/zfs/zfeature.c line 390 Inpsection showed that feature->fi_feature was 7, which is the value of SPA_FEATURE_EXTENSIBLE_DATASET in the spa_feature enum. This commit adds extensible dataset as a dependency for the sha512, edonr, and skein feature flags, which prevents the panic. OpenZFS-issue: https://www.illumos.org/issues/6585 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/892586e8a147c02d7f4053cc405229a13e796928 Porting Notes: This code was originally from Illumos, but I actually ported it from: openzfsonosx/zfs@b62a652 --- man/man5/zpool-features.5 | 6 +++--- module/zfs/zfeature_common.c | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 index dcfb30d18..ffefd4129 100644 --- a/man/man5/zpool-features.5 +++ b/man/man5/zpool-features.5 @@ -464,7 +464,7 @@ improving performance by avoiding the use of spill blocks. l l . GUID org.illumos:sha512 READ\-ONLY COMPATIBLE no -DEPENDENCIES none +DEPENDENCIES extensible_dataset .TE This feature enables the use of the SHA-512/256 truncated hash algorithm @@ -497,7 +497,7 @@ the updated GRUB stage2 module is installed). l l . GUID org.illumos:skein READ\-ONLY COMPATIBLE no -DEPENDENCIES none +DEPENDENCIES extensible_dataset .TE This feature enables the use of the Skein hash algorithm for checksum @@ -533,7 +533,7 @@ error. l l . GUID org.illumos:edonr READ\-ONLY COMPATIBLE no -DEPENDENCIES none +DEPENDENCIES extensible_dataset .TE This feature enables the use of the Edon-R hash algorithm for checksum, diff --git a/module/zfs/zfeature_common.c b/module/zfs/zfeature_common.c index e8b0a16ae..9beb4903e 100644 --- a/module/zfs/zfeature_common.c +++ b/module/zfs/zfeature_common.c @@ -253,16 +253,36 @@ zpool_feature_init(void) "Variable on-disk size of dnodes.", ZFEATURE_FLAG_PER_DATASET, large_dnode_deps); } + + { + static const spa_feature_t sha512_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; zfeature_register(SPA_FEATURE_SHA512, "org.illumos:sha512", "sha512", "SHA-512/256 hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, NULL); + ZFEATURE_FLAG_PER_DATASET, sha512_deps); + } + { + static const spa_feature_t skein_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; zfeature_register(SPA_FEATURE_SKEIN, "org.illumos:skein", "skein", "Skein hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, NULL); + ZFEATURE_FLAG_PER_DATASET, skein_deps); + } + + { + static const spa_feature_t edonr_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; zfeature_register(SPA_FEATURE_EDONR, "org.illumos:edonr", "edonr", "Edon-R hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, NULL); + ZFEATURE_FLAG_PER_DATASET, edonr_deps); + } } From 7d75815dc950bdce3fd03cc40a3352d93c270e0f Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 15 Aug 2016 19:34:02 -0400 Subject: [PATCH 4/4] Use 100MB pool for filetest_001_pos.ksh checksum test As part of its tests, filetest_001_pos.ksh wipes the entire vdev to create checksum errors. This patch uses the setup/cleanup scripts from the scrub_mirror test to create a custom 100MB pool, rather than using the entire device size that is passed into zfs-tests.sh (which defaults to 2GB). This speeds up the buildbot tests, and also makes it possible for someone to use real disks (say, 1TB) without the test taking an insanely long amount of time. --- .../tests/functional/checksum/Makefile.am | 1 + .../tests/functional/checksum/cleanup.ksh | 24 ++++++- .../tests/functional/checksum/default.cfg | 66 +++++++++++++++++++ .../functional/checksum/filetest_001_pos.ksh | 2 +- .../tests/functional/checksum/setup.ksh | 28 ++++++-- 5 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 tests/zfs-tests/tests/functional/checksum/default.cfg diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am index 2d7d271a0..8132ea1d6 100644 --- a/tests/zfs-tests/tests/functional/checksum/Makefile.am +++ b/tests/zfs-tests/tests/functional/checksum/Makefile.am @@ -7,6 +7,7 @@ AUTOMAKE_OPTIONS = subdir-objects pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum dist_pkgdata_SCRIPTS = \ + default.cfg \ setup.ksh \ cleanup.ksh \ run_edonr_test.ksh \ diff --git a/tests/zfs-tests/tests/functional/checksum/cleanup.ksh b/tests/zfs-tests/tests/functional/checksum/cleanup.ksh index 79cd6e9f9..da628bcd1 100755 --- a/tests/zfs-tests/tests/functional/checksum/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/checksum/cleanup.ksh @@ -25,6 +25,26 @@ # Use is subject to license terms. # -. $STF_SUITE/include/libtest.shlib +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# -default_cleanup +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/scrub_mirror/default.cfg + + +verify_runnable "global" + +$DF -F zfs -h | $GREP "$TESTFS " >/dev/null +[[ $? == 0 ]] && log_must $ZFS umount -f $TESTDIR +destroy_pool $TESTPOOL + +# recreate and destroy a zpool over the disks to restore the partitions to +# normal +if [[ -n $SINGLE_DISK ]]; then + log_must cleanup_devices $MIRROR_PRIMARY +else + log_must cleanup_devices $MIRROR_PRIMARY $MIRROR_SECONDARY +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/checksum/default.cfg b/tests/zfs-tests/tests/functional/checksum/default.cfg new file mode 100644 index 000000000..c173cc1c9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/checksum/default.cfg @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +typeset -i NUMBER_OF_DISKS=0 +for i in $DISKS; do + [[ -n $MIRROR_PRIMARY ]] && MIRROR_SECONDARY=$i + [[ -z $MIRROR_PRIMARY ]] && MIRROR_PRIMARY=$i +done + +if [[ -z $MIRROR_SECONDARY ]]; then + # We need to repartition the single disk to two slices + SINGLE_DISK=$MIRROR_PRIMARY + MIRROR_SECONDARY=$MIRROR_PRIMARY + SIDE_PRIMARY_PART=0 + SIDE_SECONDARY_PART=1 + if is_linux; then + SIDE_PRIMARY=${SINGLE_DISK}p1 + SIDE_SECONDARY=${SINGLE_DISK}p2 + else + SIDE_PRIMARY=${SINGLE_DISK}s${SIDE_PRIMARY_PART} + SIDE_SECONDARY=${SINGLE_DISK}s${SIDE_SECONDARY_PART} + fi +else + SIDE_PRIMARY_PART=0 + SIDE_SECONDARY_PART=0 + if is_linux; then + SIDE_PRIMARY=${MIRROR_PRIMARY}p1 + SIDE_SECONDARY=${MIRROR_SECONDARY}p1 + else + SIDE_PRIMARY=${MIRROR_PRIMARY}s${SIDE_PRIMARY_PART} + SIDE_SECONDARY=${MIRROR_SECONDARY}s${SIDE_SECONDARY_PART} + fi +fi + + +export MIRROR_PRIMARY MIRROR_SECONDARY SINGLE_DISK SIDE_PRIMARY SIDE_SECONDARY + +export MIRROR_MEGS=100 +export MIRROR_SIZE=${MIRROR_MEGS}m # default mirror size diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh index 758b353c9..a360dfd36 100755 --- a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh @@ -89,7 +89,7 @@ while is_pool_scrubbing $TESTPOOL; do $SLEEP 1 done $ZPOOL status -P -v $TESTPOOL | grep $firstvdev | read -r name state rd wr cksum -log_assert "Normal file write test saw: $cksum errors" +log_assert "Normal file write test saw $cksum checksum errors" log_must [ $cksum -eq 0 ] rm -fr $TESTDIR/* diff --git a/tests/zfs-tests/tests/functional/checksum/setup.ksh b/tests/zfs-tests/tests/functional/checksum/setup.ksh index 27e125df4..dba4883f1 100755 --- a/tests/zfs-tests/tests/functional/checksum/setup.ksh +++ b/tests/zfs-tests/tests/functional/checksum/setup.ksh @@ -21,11 +21,31 @@ # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -. $STF_SUITE/include/libtest.shlib +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# -DISK=${DISKS%% *} -default_mirror_setup $DISKS +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/checksum/default.cfg + +verify_runnable "global" + +if ! $(is_physical_device $DISKS) ; then + log_unsupported "This directory cannot be run on raw files." +fi + +if [[ -n $SINGLE_DISK ]]; then + log_note "Partitioning a single disk ($SINGLE_DISK)" +else + log_note "Partitioning disks ($MIRROR_PRIMARY $MIRROR_SECONDARY)" +fi +log_must set_partition $SIDE_PRIMARY_PART "" $MIRROR_SIZE $MIRROR_PRIMARY +log_must set_partition $SIDE_SECONDARY_PART "" $MIRROR_SIZE $MIRROR_SECONDARY + +default_mirror_setup $SIDE_PRIMARY $SIDE_SECONDARY + +log_pass