mirror_zfs/tests/zfs-tests/cmd/checksum/sha2_test.c
Tino Reichardt 985c33b132
Introduce BLAKE3 checksums as an OpenZFS feature
This commit adds BLAKE3 checksums to OpenZFS, it has similar
performance to Edon-R, but without the caveats around the latter.

Homepage of BLAKE3: https://github.com/BLAKE3-team/BLAKE3
Wikipedia: https://en.wikipedia.org/wiki/BLAKE_(hash_function)#BLAKE3

Short description of Wikipedia:

  BLAKE3 is a cryptographic hash function based on Bao and BLAKE2,
  created by Jack O'Connor, Jean-Philippe Aumasson, Samuel Neves, and
  Zooko Wilcox-O'Hearn. It was announced on January 9, 2020, at Real
  World Crypto. BLAKE3 is a single algorithm with many desirable
  features (parallelism, XOF, KDF, PRF and MAC), in contrast to BLAKE
  and BLAKE2, which are algorithm families with multiple variants.
  BLAKE3 has a binary tree structure, so it supports a practically
  unlimited degree of parallelism (both SIMD and multithreading) given
  enough input. The official Rust and C implementations are
  dual-licensed as public domain (CC0) and the Apache License.

Along with adding the BLAKE3 hash into the OpenZFS infrastructure a
new benchmarking file called chksum_bench was introduced.  When read
it reports the speed of the available checksum functions.

On Linux: cat /proc/spl/kstat/zfs/chksum_bench
On FreeBSD: sysctl kstat.zfs.misc.chksum_bench

This is an example output of an i3-1005G1 test system with Debian 11:

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1196    1602    1761    1749    1762    1759    1751
skein-generic      546     591     608     615     619     612     616
sha256-generic     240     300     316     314     304     285     276
sha512-generic     353     441     467     476     472     467     426
blake3-generic     308     313     313     313     312     313     312
blake3-sse2        402    1289    1423    1446    1432    1458    1413
blake3-sse41       427    1470    1625    1704    1679    1607    1629
blake3-avx2        428    1920    3095    3343    3356    3318    3204
blake3-avx512      473    2687    4905    5836    5844    5643    5374

Output on Debian 5.10.0-10-amd64 system: (Ryzen 7 5800X)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1840    2458    2665    2719    2711    2723    2693
skein-generic      870     966     996     992    1003    1005    1009
sha256-generic     415     442     453     455     457     457     457
sha512-generic     608     690     711     718     719     720     721
blake3-generic     301     313     311     309     309     310     310
blake3-sse2        343    1865    2124    2188    2180    2181    2186
blake3-sse41       364    2091    2396    2509    2463    2482    2488
blake3-avx2        365    2590    4399    4971    4915    4802    4764

Output on Debian 5.10.0-9-powerpc64le system: (POWER 9)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1213    1703    1889    1918    1957    1902    1907
skein-generic      434     492     520     522     511     525     525
sha256-generic     167     183     187     188     188     187     188
sha512-generic     186     216     222     221     225     224     224
blake3-generic     153     152     154     153     151     153     153
blake3-sse2        391    1170    1366    1406    1428    1426    1414
blake3-sse41       352    1049    1212    1174    1262    1258    1259

Output on Debian 5.10.0-11-arm64 system: (Pi400)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic      487     603     629     639     643     641     641
skein-generic      271     299     303     308     309     309     307
sha256-generic     117     127     128     130     130     129     130
sha512-generic     145     165     170     172     173     174     175
blake3-generic      81      29      71      89      89      89      89
blake3-sse2        112     323     368     379     380     371     374
blake3-sse41       101     315     357     368     369     364     360

Structurally, the new code is mainly split into these parts:
- 1x cross platform generic c variant: blake3_generic.c
- 4x assembly for X86-64 (SSE2, SSE4.1, AVX2, AVX512)
- 2x assembly for ARMv8 (NEON converted from SSE2)
- 2x assembly for PPC64-LE (POWER8 converted from SSE2)
- one file for switching between the implementations

Note the PPC64 assembly requires the VSX instruction set and the
kfpu_begin() / kfpu_end() calls on PowerPC were updated accordingly.

Reviewed-by: Felix Dörre <felix@dogcraft.de>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Co-authored-by: Rich Ercolani <rincebrain@gmail.com>
Closes #10058
Closes #12918
2022-06-08 15:55:57 -07:00

245 lines
7.3 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2013 Saso Kiselkov. All rights reserved.
*/
/*
* This is just to keep the compiler happy about sys/time.h not declaring
* gettimeofday due to -D_KERNEL (we can do this since we're actually
* running in userspace, but we need -D_KERNEL for the remaining SHA2 code).
*/
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/time.h>
#define _SHA2_IMPL
#include <sys/sha2.h>
#include <sys/stdtypes.h>
/*
* Test messages from:
* http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
*/
const char *test_msg0 = "abc";
const char *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn"
"lmnomnopnopq";
const char *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi"
"jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
/*
* Test digests from:
* http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
*/
const uint8_t sha256_test_digests[][32] = {
{
/* for test_msg0 */
0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 0xCF, 0xEA,
0x41, 0x41, 0x40, 0xDE, 0x5D, 0xAE, 0x22, 0x23,
0xB0, 0x03, 0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C,
0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 0x15, 0xAD
},
{
/* for test_msg1 */
0x24, 0x8D, 0x6A, 0x61, 0xD2, 0x06, 0x38, 0xB8,
0xE5, 0xC0, 0x26, 0x93, 0x0C, 0x3E, 0x60, 0x39,
0xA3, 0x3C, 0xE4, 0x59, 0x64, 0xFF, 0x21, 0x67,
0xF6, 0xEC, 0xED, 0xD4, 0x19, 0xDB, 0x06, 0xC1
}
/* no test vector for test_msg2 */
};
const uint8_t sha384_test_digests[][48] = {
{
/* for test_msg0 */
0xCB, 0x00, 0x75, 0x3F, 0x45, 0xA3, 0x5E, 0x8B,
0xB5, 0xA0, 0x3D, 0x69, 0x9A, 0xC6, 0x50, 0x07,
0x27, 0x2C, 0x32, 0xAB, 0x0E, 0xDE, 0xD1, 0x63,
0x1A, 0x8B, 0x60, 0x5A, 0x43, 0xFF, 0x5B, 0xED,
0x80, 0x86, 0x07, 0x2B, 0xA1, 0xE7, 0xCC, 0x23,
0x58, 0xBA, 0xEC, 0xA1, 0x34, 0xC8, 0x25, 0xA7
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0x09, 0x33, 0x0C, 0x33, 0xF7, 0x11, 0x47, 0xE8,
0x3D, 0x19, 0x2F, 0xC7, 0x82, 0xCD, 0x1B, 0x47,
0x53, 0x11, 0x1B, 0x17, 0x3B, 0x3B, 0x05, 0xD2,
0x2F, 0xA0, 0x80, 0x86, 0xE3, 0xB0, 0xF7, 0x12,
0xFC, 0xC7, 0xC7, 0x1A, 0x55, 0x7E, 0x2D, 0xB9,
0x66, 0xC3, 0xE9, 0xFA, 0x91, 0x74, 0x60, 0x39
}
};
const uint8_t sha512_test_digests[][64] = {
{
/* for test_msg0 */
0xDD, 0xAF, 0x35, 0xA1, 0x93, 0x61, 0x7A, 0xBA,
0xCC, 0x41, 0x73, 0x49, 0xAE, 0x20, 0x41, 0x31,
0x12, 0xE6, 0xFA, 0x4E, 0x89, 0xA9, 0x7E, 0xA2,
0x0A, 0x9E, 0xEE, 0xE6, 0x4B, 0x55, 0xD3, 0x9A,
0x21, 0x92, 0x99, 0x2A, 0x27, 0x4F, 0xC1, 0xA8,
0x36, 0xBA, 0x3C, 0x23, 0xA3, 0xFE, 0xEB, 0xBD,
0x45, 0x4D, 0x44, 0x23, 0x64, 0x3C, 0xE8, 0x0E,
0x2A, 0x9A, 0xC9, 0x4F, 0xA5, 0x4C, 0xA4, 0x9F
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0x8E, 0x95, 0x9B, 0x75, 0xDA, 0xE3, 0x13, 0xDA,
0x8C, 0xF4, 0xF7, 0x28, 0x14, 0xFC, 0x14, 0x3F,
0x8F, 0x77, 0x79, 0xC6, 0xEB, 0x9F, 0x7F, 0xA1,
0x72, 0x99, 0xAE, 0xAD, 0xB6, 0x88, 0x90, 0x18,
0x50, 0x1D, 0x28, 0x9E, 0x49, 0x00, 0xF7, 0xE4,
0x33, 0x1B, 0x99, 0xDE, 0xC4, 0xB5, 0x43, 0x3A,
0xC7, 0xD3, 0x29, 0xEE, 0xB6, 0xDD, 0x26, 0x54,
0x5E, 0x96, 0xE5, 0x5B, 0x87, 0x4B, 0xE9, 0x09
}
};
const uint8_t sha512_224_test_digests[][28] = {
{
/* for test_msg0 */
0x46, 0x34, 0x27, 0x0F, 0x70, 0x7B, 0x6A, 0x54,
0xDA, 0xAE, 0x75, 0x30, 0x46, 0x08, 0x42, 0xE2,
0x0E, 0x37, 0xED, 0x26, 0x5C, 0xEE, 0xE9, 0xA4,
0x3E, 0x89, 0x24, 0xAA
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0x23, 0xFE, 0xC5, 0xBB, 0x94, 0xD6, 0x0B, 0x23,
0x30, 0x81, 0x92, 0x64, 0x0B, 0x0C, 0x45, 0x33,
0x35, 0xD6, 0x64, 0x73, 0x4F, 0xE4, 0x0E, 0x72,
0x68, 0x67, 0x4A, 0xF9
}
};
const uint8_t sha512_256_test_digests[][32] = {
{
/* for test_msg0 */
0x53, 0x04, 0x8E, 0x26, 0x81, 0x94, 0x1E, 0xF9,
0x9B, 0x2E, 0x29, 0xB7, 0x6B, 0x4C, 0x7D, 0xAB,
0xE4, 0xC2, 0xD0, 0xC6, 0x34, 0xFC, 0x6D, 0x46,
0xE0, 0xE2, 0xF1, 0x31, 0x07, 0xE7, 0xAF, 0x23
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0x39, 0x28, 0xE1, 0x84, 0xFB, 0x86, 0x90, 0xF8,
0x40, 0xDA, 0x39, 0x88, 0x12, 0x1D, 0x31, 0xBE,
0x65, 0xCB, 0x9D, 0x3E, 0xF8, 0x3E, 0xE6, 0x14,
0x6F, 0xEA, 0xC8, 0x61, 0xE1, 0x9B, 0x56, 0x3A
}
};
int
main(int argc, char *argv[])
{
boolean_t failed = B_FALSE;
uint64_t cpu_mhz = 0;
if (argc == 2)
cpu_mhz = atoi(argv[1]);
#define SHA2_ALGO_TEST(_m, mode, diglen, testdigest) \
do { \
SHA2_CTX ctx; \
uint8_t digest[diglen / 8]; \
SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx); \
SHA2Update(&ctx, _m, strlen(_m)); \
SHA2Final(digest, &ctx); \
(void) printf("SHA%-9sMessage: " #_m \
"\tResult: ", #mode); \
if (memcmp(digest, testdigest, diglen / 8) == 0) { \
(void) printf("OK\n"); \
} else { \
(void) printf("FAILED!\n"); \
failed = B_TRUE; \
} \
} while (0)
#define SHA2_PERF_TEST(mode, diglen) \
do { \
SHA2_CTX ctx; \
uint8_t digest[diglen / 8]; \
uint8_t block[131072]; \
uint64_t delta; \
double cpb = 0; \
int i; \
struct timeval start, end; \
memset(block, 0, sizeof (block)); \
(void) gettimeofday(&start, NULL); \
SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx); \
for (i = 0; i < 8192; i++) \
SHA2Update(&ctx, block, sizeof (block)); \
SHA2Final(digest, &ctx); \
(void) gettimeofday(&end, NULL); \
delta = (end.tv_sec * 1000000llu + end.tv_usec) - \
(start.tv_sec * 1000000llu + start.tv_usec); \
if (cpu_mhz != 0) { \
cpb = (cpu_mhz * 1e6 * ((double)delta / \
1000000)) / (8192 * 128 * 1024); \
} \
(void) printf("SHA%-9s%llu us (%.02f CPB)\n", #mode, \
(u_longlong_t)delta, cpb); \
} while (0)
(void) printf("Running algorithm correctness tests:\n");
SHA2_ALGO_TEST(test_msg0, 256, 256, sha256_test_digests[0]);
SHA2_ALGO_TEST(test_msg1, 256, 256, sha256_test_digests[1]);
SHA2_ALGO_TEST(test_msg0, 384, 384, sha384_test_digests[0]);
SHA2_ALGO_TEST(test_msg2, 384, 384, sha384_test_digests[2]);
SHA2_ALGO_TEST(test_msg0, 512, 512, sha512_test_digests[0]);
SHA2_ALGO_TEST(test_msg2, 512, 512, sha512_test_digests[2]);
SHA2_ALGO_TEST(test_msg0, 512_224, 224, sha512_224_test_digests[0]);
SHA2_ALGO_TEST(test_msg2, 512_224, 224, sha512_224_test_digests[2]);
SHA2_ALGO_TEST(test_msg0, 512_256, 256, sha512_256_test_digests[0]);
SHA2_ALGO_TEST(test_msg2, 512_256, 256, sha512_256_test_digests[2]);
if (failed)
return (1);
(void) printf("Running performance tests (hashing 1024 MiB of "
"data):\n");
SHA2_PERF_TEST(256, 256);
SHA2_PERF_TEST(512, 512);
return (0);
}