mirror_zfs/tests/zfs-tests/cmd/checksum/edonr_test.c
Tino Reichardt 985c33b132
Introduce BLAKE3 checksums as an OpenZFS feature
This commit adds BLAKE3 checksums to OpenZFS, it has similar
performance to Edon-R, but without the caveats around the latter.

Homepage of BLAKE3: https://github.com/BLAKE3-team/BLAKE3
Wikipedia: https://en.wikipedia.org/wiki/BLAKE_(hash_function)#BLAKE3

Short description of Wikipedia:

  BLAKE3 is a cryptographic hash function based on Bao and BLAKE2,
  created by Jack O'Connor, Jean-Philippe Aumasson, Samuel Neves, and
  Zooko Wilcox-O'Hearn. It was announced on January 9, 2020, at Real
  World Crypto. BLAKE3 is a single algorithm with many desirable
  features (parallelism, XOF, KDF, PRF and MAC), in contrast to BLAKE
  and BLAKE2, which are algorithm families with multiple variants.
  BLAKE3 has a binary tree structure, so it supports a practically
  unlimited degree of parallelism (both SIMD and multithreading) given
  enough input. The official Rust and C implementations are
  dual-licensed as public domain (CC0) and the Apache License.

Along with adding the BLAKE3 hash into the OpenZFS infrastructure a
new benchmarking file called chksum_bench was introduced.  When read
it reports the speed of the available checksum functions.

On Linux: cat /proc/spl/kstat/zfs/chksum_bench
On FreeBSD: sysctl kstat.zfs.misc.chksum_bench

This is an example output of an i3-1005G1 test system with Debian 11:

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1196    1602    1761    1749    1762    1759    1751
skein-generic      546     591     608     615     619     612     616
sha256-generic     240     300     316     314     304     285     276
sha512-generic     353     441     467     476     472     467     426
blake3-generic     308     313     313     313     312     313     312
blake3-sse2        402    1289    1423    1446    1432    1458    1413
blake3-sse41       427    1470    1625    1704    1679    1607    1629
blake3-avx2        428    1920    3095    3343    3356    3318    3204
blake3-avx512      473    2687    4905    5836    5844    5643    5374

Output on Debian 5.10.0-10-amd64 system: (Ryzen 7 5800X)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1840    2458    2665    2719    2711    2723    2693
skein-generic      870     966     996     992    1003    1005    1009
sha256-generic     415     442     453     455     457     457     457
sha512-generic     608     690     711     718     719     720     721
blake3-generic     301     313     311     309     309     310     310
blake3-sse2        343    1865    2124    2188    2180    2181    2186
blake3-sse41       364    2091    2396    2509    2463    2482    2488
blake3-avx2        365    2590    4399    4971    4915    4802    4764

Output on Debian 5.10.0-9-powerpc64le system: (POWER 9)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic     1213    1703    1889    1918    1957    1902    1907
skein-generic      434     492     520     522     511     525     525
sha256-generic     167     183     187     188     188     187     188
sha512-generic     186     216     222     221     225     224     224
blake3-generic     153     152     154     153     151     153     153
blake3-sse2        391    1170    1366    1406    1428    1426    1414
blake3-sse41       352    1049    1212    1174    1262    1258    1259

Output on Debian 5.10.0-11-arm64 system: (Pi400)

implementation      1k      4k     16k     64k    256k      1m      4m
edonr-generic      487     603     629     639     643     641     641
skein-generic      271     299     303     308     309     309     307
sha256-generic     117     127     128     130     130     129     130
sha512-generic     145     165     170     172     173     174     175
blake3-generic      81      29      71      89      89      89      89
blake3-sse2        112     323     368     379     380     371     374
blake3-sse41       101     315     357     368     369     364     360

Structurally, the new code is mainly split into these parts:
- 1x cross platform generic c variant: blake3_generic.c
- 4x assembly for X86-64 (SSE2, SSE4.1, AVX2, AVX512)
- 2x assembly for ARMv8 (NEON converted from SSE2)
- 2x assembly for PPC64-LE (POWER8 converted from SSE2)
- one file for switching between the implementations

Note the PPC64 assembly requires the VSX instruction set and the
kfpu_begin() / kfpu_end() calls on PowerPC were updated accordingly.

Reviewed-by: Felix Dörre <felix@dogcraft.de>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Co-authored-by: Rich Ercolani <rincebrain@gmail.com>
Closes #10058
Closes #12918
2022-06-08 15:55:57 -07:00

213 lines
6.5 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2013 Saso Kiselkov. All rights reserved.
*/
/*
* This is just to keep the compiler happy about sys/time.h not declaring
* gettimeofday due to -D_KERNEL (we can do this since we're actually
* running in userspace, but we need -D_KERNEL for the remaining Edon-R code).
*/
#include <sys/edonr.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/stdtypes.h>
/*
* Test messages from:
* http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
*/
const char *test_msg0 = "abc";
const char *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn"
"lmnomnopnopq";
const char *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi"
"jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
/*
* Test digests computed by hand. There's no formal standard or spec for edonr.
*/
const uint8_t edonr_224_test_digests[][28] = {
{
/* for test_msg0 */
0x56, 0x63, 0xc4, 0x93, 0x95, 0x20, 0xfa, 0xf6,
0x12, 0x31, 0x65, 0xa4, 0x66, 0xf2, 0x56, 0x01,
0x95, 0x2e, 0xa9, 0xe4, 0x24, 0xdd, 0xc9, 0x6b,
0xef, 0xd0, 0x40, 0x94
},
{
/* for test_msg1 */
0xd0, 0x13, 0xe4, 0x87, 0x4d, 0x06, 0x8d, 0xca,
0x4e, 0x14, 0xb9, 0x37, 0x2f, 0xce, 0x12, 0x20,
0x60, 0xf8, 0x5c, 0x0a, 0xfd, 0x7a, 0x7d, 0x97,
0x88, 0x2b, 0x05, 0x75
}
/* no test vector for test_msg2 */
};
const uint8_t edonr_256_test_digests[][32] = {
{
/* for test_msg0 */
0x54, 0xd7, 0x8b, 0x13, 0xc7, 0x4e, 0xda, 0x5a,
0xed, 0xc2, 0x71, 0xcc, 0x88, 0x1f, 0xb2, 0x2f,
0x83, 0x99, 0xaf, 0xd3, 0x04, 0x0b, 0x6a, 0x39,
0x2d, 0x73, 0x94, 0x05, 0x50, 0x8d, 0xd8, 0x51
},
{
/* for test_msg1 */
0x49, 0x2d, 0x0b, 0x19, 0xab, 0x1e, 0xde, 0x3a,
0xea, 0x9b, 0xf2, 0x39, 0x3a, 0xb1, 0x21, 0xde,
0x21, 0xf6, 0x80, 0x1f, 0xad, 0xbe, 0x8b, 0x07,
0xc7, 0xfb, 0xe6, 0x99, 0x0e, 0x4d, 0x73, 0x63
}
/* no test vectorfor test_msg2 */
};
const uint8_t edonr_384_test_digests[][48] = {
{
/* for test_msg0 */
0x0e, 0x7c, 0xd7, 0x85, 0x78, 0x77, 0xe0, 0x89,
0x5b, 0x1c, 0xdf, 0x49, 0xf4, 0x1d, 0x20, 0x9c,
0x72, 0x7d, 0x2e, 0x57, 0x9b, 0x9b, 0x9a, 0xdc,
0x60, 0x27, 0x97, 0x82, 0xb9, 0x90, 0x72, 0xec,
0x7e, 0xce, 0xd3, 0x16, 0x5f, 0x47, 0x75, 0x48,
0xfa, 0x60, 0x72, 0x7e, 0x01, 0xc7, 0x7c, 0xc6
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0xe2, 0x34, 0xa1, 0x02, 0x83, 0x76, 0xae, 0xe6,
0x82, 0xd9, 0x38, 0x32, 0x0e, 0x00, 0x78, 0xd2,
0x34, 0xdb, 0xb9, 0xbd, 0xf0, 0x08, 0xa8, 0x0f,
0x63, 0x1c, 0x3d, 0x4a, 0xfd, 0x0a, 0xe9, 0x59,
0xdc, 0xd4, 0xce, 0xcd, 0x8d, 0x67, 0x6c, 0xea,
0xbb, 0x1a, 0x32, 0xed, 0x5c, 0x6b, 0xf1, 0x7f
}
};
const uint8_t edonr_512_test_digests[][64] = {
{
/* for test_msg0 */
0x1b, 0x14, 0xdb, 0x15, 0x5f, 0x1d, 0x40, 0x65,
0x94, 0xb8, 0xce, 0xf7, 0x0a, 0x43, 0x62, 0xec,
0x6b, 0x5d, 0xe6, 0xa5, 0xda, 0xf5, 0x0e, 0xc9,
0x99, 0xe9, 0x87, 0xc1, 0x9d, 0x30, 0x49, 0xe2,
0xde, 0x59, 0x77, 0xbb, 0x05, 0xb1, 0xbb, 0x22,
0x00, 0x50, 0xa1, 0xea, 0x5b, 0x46, 0xa9, 0xf1,
0x74, 0x0a, 0xca, 0xfb, 0xf6, 0xb4, 0x50, 0x32,
0xad, 0xc9, 0x0c, 0x62, 0x83, 0x72, 0xc2, 0x2b
},
{
/* no test vector for test_msg1 */
0
},
{
/* for test_msg2 */
0x53, 0x51, 0x07, 0x0d, 0xc5, 0x1c, 0x3b, 0x2b,
0xac, 0xa5, 0xa6, 0x0d, 0x02, 0x52, 0xcc, 0xb4,
0xe4, 0x92, 0x1a, 0x96, 0xfe, 0x5a, 0x69, 0xe7,
0x6d, 0xad, 0x48, 0xfd, 0x21, 0xa0, 0x84, 0x5a,
0xd5, 0x7f, 0x88, 0x0b, 0x3e, 0x4a, 0x90, 0x7b,
0xc5, 0x03, 0x15, 0x18, 0x42, 0xbb, 0x94, 0x9e,
0x1c, 0xba, 0x74, 0x39, 0xa6, 0x40, 0x9a, 0x34,
0xb8, 0x43, 0x6c, 0xb4, 0x69, 0x21, 0x58, 0x3c
}
};
int
main(int argc, char *argv[])
{
boolean_t failed = B_FALSE;
uint64_t cpu_mhz = 0;
if (argc == 2)
cpu_mhz = atoi(argv[1]);
#define EDONR_ALGO_TEST(_m, mode, testdigest) \
do { \
EdonRState ctx; \
uint8_t digest[mode / 8]; \
EdonRInit(&ctx, mode); \
EdonRUpdate(&ctx, (const uint8_t *) _m, strlen(_m) * 8);\
EdonRFinal(&ctx, digest); \
(void) printf("Edon-R-%-6sMessage: " #_m \
"\tResult: ", #mode); \
if (memcmp(digest, testdigest, mode / 8) == 0) { \
(void) printf("OK\n"); \
} else { \
(void) printf("FAILED!\n"); \
failed = B_TRUE; \
} \
} while (0)
#define EDONR_PERF_TEST(mode) \
do { \
EdonRState ctx; \
uint8_t digest[mode / 8]; \
uint8_t block[131072]; \
uint64_t delta; \
double cpb = 0; \
int i; \
struct timeval start, end; \
memset(block, 0, sizeof (block)); \
(void) gettimeofday(&start, NULL); \
EdonRInit(&ctx, mode); \
for (i = 0; i < 8192; i++) \
EdonRUpdate(&ctx, block, sizeof (block) * 8); \
EdonRFinal(&ctx, digest); \
(void) gettimeofday(&end, NULL); \
delta = (end.tv_sec * 1000000llu + end.tv_usec) - \
(start.tv_sec * 1000000llu + start.tv_usec); \
if (cpu_mhz != 0) { \
cpb = (cpu_mhz * 1e6 * ((double)delta / \
1000000)) / (8192 * 128 * 1024); \
} \
(void) printf("Edon-R-%-6s%llu us (%.02f CPB)\n", #mode,\
(u_longlong_t)delta, cpb); \
} while (0)
(void) printf("Running algorithm correctness tests:\n");
EDONR_ALGO_TEST(test_msg0, 224, edonr_224_test_digests[0]);
EDONR_ALGO_TEST(test_msg1, 224, edonr_224_test_digests[1]);
EDONR_ALGO_TEST(test_msg0, 256, edonr_256_test_digests[0]);
EDONR_ALGO_TEST(test_msg1, 256, edonr_256_test_digests[1]);
EDONR_ALGO_TEST(test_msg0, 384, edonr_384_test_digests[0]);
EDONR_ALGO_TEST(test_msg2, 384, edonr_384_test_digests[2]);
EDONR_ALGO_TEST(test_msg0, 512, edonr_512_test_digests[0]);
EDONR_ALGO_TEST(test_msg2, 512, edonr_512_test_digests[2]);
if (failed)
return (1);
(void) printf("Running performance tests (hashing 1024 MiB of "
"data):\n");
EDONR_PERF_TEST(256);
EDONR_PERF_TEST(512);
return (0);
}