mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-03-10 12:26:27 +03:00
icp: add SHA512 implementation using Intel SHA512 extensions
Generated from crypto/sha/asm/sha512-x86_64.pl in openssl/openssl@241d4826f8. Sponsored-by: TrueNAS Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Attila Fülöp <attila@fueloep.org> Signed-off-by: Rob Norris <rob.norris@truenas.com> Closes #18233
This commit is contained in:
parent
3547a358fd
commit
09c27a14a3
@ -22,6 +22,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
* Copyright (c) 2026, TrueNAS.
|
||||
*/
|
||||
|
||||
#include <sys/simd.h>
|
||||
@ -92,6 +93,20 @@ const sha512_ops_t sha512_avx2_impl = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SHA512EXT)
|
||||
static boolean_t sha2_have_sha512ext(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_sha512ext_available());
|
||||
}
|
||||
|
||||
TF(zfs_sha512_transform_sha512ext, tf_sha512_sha512ext);
|
||||
const sha512_ops_t sha512_sha512ext_impl = {
|
||||
.is_supported = sha2_have_sha512ext,
|
||||
.transform = tf_sha512_sha512ext,
|
||||
.name = "sha512ext"
|
||||
};
|
||||
#endif
|
||||
|
||||
#elif defined(__aarch64__) || defined(__arm__)
|
||||
extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t);
|
||||
const sha512_ops_t sha512_armv7_impl = {
|
||||
@ -164,6 +179,9 @@ static const sha512_ops_t *const sha512_impls[] = {
|
||||
#if defined(__x86_64) && defined(HAVE_AVX2)
|
||||
&sha512_avx2_impl,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_SHA512EXT)
|
||||
&sha512_sha512ext_impl,
|
||||
#endif
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
&sha512_armv7_impl,
|
||||
#if defined(__aarch64__)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
|
||||
* Copyright 2004-2025 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -114,6 +114,50 @@ K512:
|
||||
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
|
||||
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
|
||||
|
||||
.balign 64
|
||||
SET_OBJ(K512_single)
|
||||
K512_single:
|
||||
.quad 0x428a2f98d728ae22, 0x7137449123ef65cd
|
||||
.quad 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
|
||||
.quad 0x3956c25bf348b538, 0x59f111f1b605d019
|
||||
.quad 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
|
||||
.quad 0xd807aa98a3030242, 0x12835b0145706fbe
|
||||
.quad 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
|
||||
.quad 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
|
||||
.quad 0x9bdc06a725c71235, 0xc19bf174cf692694
|
||||
.quad 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
|
||||
.quad 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
|
||||
.quad 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
|
||||
.quad 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
|
||||
.quad 0x983e5152ee66dfab, 0xa831c66d2db43210
|
||||
.quad 0xb00327c898fb213f, 0xbf597fc7beef0ee4
|
||||
.quad 0xc6e00bf33da88fc2, 0xd5a79147930aa725
|
||||
.quad 0x06ca6351e003826f, 0x142929670a0e6e70
|
||||
.quad 0x27b70a8546d22ffc, 0x2e1b21385c26c926
|
||||
.quad 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
|
||||
.quad 0x650a73548baf63de, 0x766a0abb3c77b2a8
|
||||
.quad 0x81c2c92e47edaee6, 0x92722c851482353b
|
||||
.quad 0xa2bfe8a14cf10364, 0xa81a664bbc423001
|
||||
.quad 0xc24b8b70d0f89791, 0xc76c51a30654be30
|
||||
.quad 0xd192e819d6ef5218, 0xd69906245565a910
|
||||
.quad 0xf40e35855771202a, 0x106aa07032bbd1b8
|
||||
.quad 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
|
||||
.quad 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
|
||||
.quad 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
|
||||
.quad 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
|
||||
.quad 0x748f82ee5defb2fc, 0x78a5636f43172f60
|
||||
.quad 0x84c87814a1f0ab72, 0x8cc702081a6439ec
|
||||
.quad 0x90befffa23631e28, 0xa4506cebde82bde9
|
||||
.quad 0xbef9a3f7b2c67915, 0xc67178f2e372532b
|
||||
.quad 0xca273eceea26619c, 0xd186b8c721c0c207
|
||||
.quad 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
|
||||
.quad 0x06f067aa72176fba, 0x0a637dc5a2c898a6
|
||||
.quad 0x113f9804bef90dae, 0x1b710b35131c471b
|
||||
.quad 0x28db77f523047d84, 0x32caab7b40c72493
|
||||
.quad 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
|
||||
.quad 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
|
||||
.quad 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
|
||||
|
||||
ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
@ -4010,6 +4054,278 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
|
||||
SET_SIZE(zfs_sha512_transform_avx2)
|
||||
STACK_FRAME_NON_STANDARD zfs_sha512_transform_avx2
|
||||
|
||||
#ifdef HAVE_SHA512EXT
|
||||
ENTRY_ALIGN(zfs_sha512_transform_sha512ext, 64)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
orq %rdx,%rdx
|
||||
je .Lsha512ext_done
|
||||
|
||||
vbroadcasti128 1280+K512(%rip),%ymm15
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
vmovdqu 0(%rdi),%ymm0
|
||||
vmovdqu 32(%rdi),%ymm1
|
||||
|
||||
vperm2i128 $0x20,%ymm1,%ymm0,%ymm2
|
||||
vperm2i128 $0x31,%ymm1,%ymm0,%ymm3
|
||||
|
||||
vpermq $0x1b,%ymm2,%ymm13
|
||||
vpermq $0x1b,%ymm3,%ymm14
|
||||
|
||||
|
||||
leaq K512_single(%rip),%r9
|
||||
|
||||
.balign 32
|
||||
.Lsha512ext_block_loop:
|
||||
|
||||
vmovdqa %ymm13,%ymm11
|
||||
vmovdqa %ymm14,%ymm12
|
||||
|
||||
|
||||
vmovdqu 0(%rsi),%ymm0
|
||||
vpshufb %ymm15,%ymm0,%ymm3
|
||||
vpaddq 0(%r9),%ymm3,%ymm0
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
|
||||
|
||||
vmovdqu 32(%rsi),%ymm0
|
||||
vpshufb %ymm15,%ymm0,%ymm4
|
||||
vpaddq 32(%r9),%ymm4,%ymm0
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
|
||||
|
||||
|
||||
vmovdqu 64(%rsi),%ymm0
|
||||
vpshufb %ymm15,%ymm0,%ymm5
|
||||
vpaddq 64(%r9),%ymm5,%ymm0
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
|
||||
|
||||
|
||||
vmovdqu 96(%rsi),%ymm0
|
||||
vpshufb %ymm15,%ymm0,%ymm6
|
||||
vpaddq 96(%r9),%ymm6,%ymm0
|
||||
vpermq $0x1b,%ymm6,%ymm8
|
||||
vpermq $0x39,%ymm5,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm8
|
||||
vpaddq %ymm8,%ymm3,%ymm3
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xde
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xee
|
||||
|
||||
vpaddq 128(%r9),%ymm3,%ymm0
|
||||
vpermq $0x1b,%ymm3,%ymm8
|
||||
vpermq $0x39,%ymm6,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm4,%ymm4
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
|
||||
|
||||
vpaddq 160(%r9),%ymm4,%ymm0
|
||||
vpermq $0x1b,%ymm4,%ymm8
|
||||
vpermq $0x39,%ymm3,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm5,%ymm5
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xec
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
|
||||
|
||||
vpaddq 192(%r9),%ymm5,%ymm0
|
||||
vpermq $0x1b,%ymm5,%ymm8
|
||||
vpermq $0x39,%ymm4,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm6,%ymm6
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
|
||||
|
||||
vpaddq 224(%r9),%ymm6,%ymm0
|
||||
vpermq $0x1b,%ymm6,%ymm8
|
||||
vpermq $0x39,%ymm5,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm3,%ymm3
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xde
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xee
|
||||
|
||||
vpaddq 256(%r9),%ymm3,%ymm0
|
||||
vpermq $0x1b,%ymm3,%ymm8
|
||||
vpermq $0x39,%ymm6,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm4,%ymm4
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
|
||||
|
||||
vpaddq 288(%r9),%ymm4,%ymm0
|
||||
vpermq $0x1b,%ymm4,%ymm8
|
||||
vpermq $0x39,%ymm3,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm5,%ymm5
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xec
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
|
||||
|
||||
vpaddq 320(%r9),%ymm5,%ymm0
|
||||
vpermq $0x1b,%ymm5,%ymm8
|
||||
vpermq $0x39,%ymm4,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm6,%ymm6
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
|
||||
|
||||
vpaddq 352(%r9),%ymm6,%ymm0
|
||||
vpermq $0x1b,%ymm6,%ymm8
|
||||
vpermq $0x39,%ymm5,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm3,%ymm3
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xde
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xee
|
||||
|
||||
vpaddq 384(%r9),%ymm3,%ymm0
|
||||
vpermq $0x1b,%ymm3,%ymm8
|
||||
vpermq $0x39,%ymm6,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm4,%ymm4
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
|
||||
|
||||
vpaddq 416(%r9),%ymm4,%ymm0
|
||||
vpermq $0x1b,%ymm4,%ymm8
|
||||
vpermq $0x39,%ymm3,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm5,%ymm5
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xec
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
|
||||
|
||||
vpaddq 448(%r9),%ymm5,%ymm0
|
||||
vpermq $0x1b,%ymm5,%ymm8
|
||||
vpermq $0x39,%ymm4,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm6,%ymm6
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
|
||||
|
||||
vpaddq 480(%r9),%ymm6,%ymm0
|
||||
vpermq $0x1b,%ymm6,%ymm8
|
||||
vpermq $0x39,%ymm5,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm3,%ymm3
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xde
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xee
|
||||
|
||||
vpaddq 512(%r9),%ymm3,%ymm0
|
||||
vpermq $0x1b,%ymm3,%ymm8
|
||||
vpermq $0x39,%ymm6,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm4,%ymm4
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
|
||||
|
||||
|
||||
vpaddq 544(%r9),%ymm4,%ymm0
|
||||
vpermq $0x1b,%ymm4,%ymm8
|
||||
vpermq $0x39,%ymm3,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm5,%ymm5
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xec
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
|
||||
|
||||
vpaddq 576(%r9),%ymm5,%ymm0
|
||||
vpermq $0x1b,%ymm5,%ymm8
|
||||
vpermq $0x39,%ymm4,%ymm9
|
||||
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
|
||||
vpaddq %ymm7,%ymm6,%ymm6
|
||||
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
|
||||
|
||||
vpaddq 608(%r9),%ymm6,%ymm0
|
||||
.byte 0xc4,0x62,0x27,0xcb,0xe0
|
||||
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
|
||||
.byte 0xc4,0x62,0x1f,0xcb,0xd8
|
||||
|
||||
|
||||
vpaddq %ymm12,%ymm14,%ymm14
|
||||
vpaddq %ymm11,%ymm13,%ymm13
|
||||
addq $128,%rsi
|
||||
decq %rdx
|
||||
jnz .Lsha512ext_block_loop
|
||||
|
||||
|
||||
|
||||
|
||||
vperm2i128 $0x31,%ymm14,%ymm13,%ymm1
|
||||
vperm2i128 $0x20,%ymm14,%ymm13,%ymm2
|
||||
vpermq $0xb1,%ymm1,%ymm1
|
||||
vpermq $0xb1,%ymm2,%ymm2
|
||||
vmovdqu %ymm1,0(%rdi)
|
||||
vmovdqu %ymm2,32(%rdi)
|
||||
|
||||
vzeroupper
|
||||
.Lsha512ext_done:
|
||||
RET
|
||||
.cfi_endproc
|
||||
SET_SIZE(zfs_sha512_transform_sha512ext)
|
||||
STACK_FRAME_NON_STANDARD zfs_sha512_transform_sha512ext
|
||||
#endif /* HAVE_SHA512EXT */
|
||||
|
||||
/* Workaround for missing asm macro in RHEL 8. */
|
||||
#if defined(__linux__) && defined(HAVE_STACK_FRAME_NON_STANDARD) && \
|
||||
! defined(HAVE_STACK_FRAME_NON_STANDARD_ASM)
|
||||
@ -4017,6 +4333,9 @@ STACK_FRAME_NON_STANDARD zfs_sha512_transform_avx2
|
||||
.long zfs_sha512_transform_x64 - .
|
||||
.long zfs_sha512_transform_avx - .
|
||||
.long zfs_sha512_transform_avx2 - .
|
||||
#ifdef HAVE_SHA512EXT
|
||||
.long zfs_sha512_transform_sha512ext - .
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__ELF__)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user