mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-27 04:32:16 +03:00
Unify Assembler files between Linux and Windows
Add new macro ASMABI used by Windows to change calling API to "sysv_abi". Reviewed-by: Attila Fülöp <attila@fueloep.org> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Jorgen Lundman <lundman@lundman.net> Closes #14228
This commit is contained in:
@@ -188,13 +188,13 @@
|
||||
#include <sys/types.h>
|
||||
void
|
||||
aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
|
||||
uint32_t ct[4]) {
|
||||
(void) rk, (void) Nr, (void) pt, (void) ct;
|
||||
uint32_t ct[4]) {
|
||||
(void) rk, (void) Nr, (void) pt, (void) ct;
|
||||
}
|
||||
void
|
||||
aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
||||
uint32_t pt[4]) {
|
||||
(void) rk, (void) Nr, (void) pt, (void) ct;
|
||||
uint32_t pt[4]) {
|
||||
(void) rk, (void) Nr, (void) pt, (void) ct;
|
||||
}
|
||||
|
||||
|
||||
@@ -221,23 +221,23 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
||||
|
||||
// finite field multiplies by {02}, {04} and {08}
|
||||
|
||||
#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
|
||||
#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
|
||||
#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
|
||||
#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
|
||||
#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
|
||||
#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
|
||||
|
||||
// finite field multiplies required in table generation
|
||||
|
||||
#define f3(x) [[f2(x)] ^ [x]]
|
||||
#define f9(x) [[f8(x)] ^ [x]]
|
||||
#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
|
||||
#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
|
||||
#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
|
||||
#define f3(x) ((f2(x)) ^ (x))
|
||||
#define f9(x) ((f8(x)) ^ (x))
|
||||
#define fb(x) ((f8(x)) ^ (f2(x)) ^ (x))
|
||||
#define fd(x) ((f8(x)) ^ (f4(x)) ^ (x))
|
||||
#define fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x)))
|
||||
|
||||
// macros for expanding S-box data
|
||||
|
||||
#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
|
||||
#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
|
||||
#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
|
||||
#define u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x))
|
||||
#define v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x)
|
||||
#define w8(x) (x), 0, 0, 0, (x), 0, 0, 0
|
||||
|
||||
#define enc_vals(x) \
|
||||
.byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
|
||||
@@ -693,7 +693,7 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
||||
* int aes_encrypt(const unsigned char *in,
|
||||
* unsigned char *out, const aes_encrypt_ctx cx[1])/
|
||||
*/
|
||||
.section .rodata
|
||||
SECTION_STATIC
|
||||
.align 64
|
||||
enc_tab:
|
||||
enc_vals(u8)
|
||||
@@ -718,7 +718,7 @@ ENTRY_NP(aes_encrypt_amd64)
|
||||
|
||||
#else
|
||||
// OpenSolaris OS interface
|
||||
sub $[4*8], %rsp // Make room on stack to save registers
|
||||
sub $(4*8), %rsp // Make room on stack to save registers
|
||||
mov %rcx, (%rsp) // Save output pointer (P4) on stack
|
||||
mov %rdi, %r8 // context (P1)
|
||||
mov %rdx, %rdi // P3: save input pointer
|
||||
@@ -749,11 +749,11 @@ ENTRY_NP(aes_encrypt_amd64)
|
||||
|
||||
lea (kptr,%rsi), kptr
|
||||
// Jump based on byte key length * 16:
|
||||
cmp $[10*16], %esi
|
||||
cmp $(10*16), %esi
|
||||
je 3f
|
||||
cmp $[12*16], %esi
|
||||
cmp $(12*16), %esi
|
||||
je 2f
|
||||
cmp $[14*16], %esi
|
||||
cmp $(14*16), %esi
|
||||
je 1f
|
||||
mov $-1, %rax // error
|
||||
jmp 4f
|
||||
@@ -785,7 +785,7 @@ ENTRY_NP(aes_encrypt_amd64)
|
||||
mov 1*8(%rsp), %rbx
|
||||
mov 2*8(%rsp), %rbp
|
||||
mov 3*8(%rsp), %r12
|
||||
add $[4*8], %rsp
|
||||
add $(4*8), %rsp
|
||||
RET
|
||||
|
||||
SET_SIZE(aes_encrypt_amd64)
|
||||
@@ -799,7 +799,7 @@ ENTRY_NP(aes_encrypt_amd64)
|
||||
* int aes_decrypt(const unsigned char *in,
|
||||
* unsigned char *out, const aes_encrypt_ctx cx[1])/
|
||||
*/
|
||||
.section .rodata
|
||||
SECTION_STATIC
|
||||
.align 64
|
||||
dec_tab:
|
||||
dec_vals(v8)
|
||||
@@ -824,7 +824,7 @@ ENTRY_NP(aes_decrypt_amd64)
|
||||
|
||||
#else
|
||||
// OpenSolaris OS interface
|
||||
sub $[4*8], %rsp // Make room on stack to save registers
|
||||
sub $(4*8), %rsp // Make room on stack to save registers
|
||||
mov %rcx, (%rsp) // Save output pointer (P4) on stack
|
||||
mov %rdi, %r8 // context (P1)
|
||||
mov %rdx, %rdi // P3: save input pointer
|
||||
@@ -861,11 +861,11 @@ ENTRY_NP(aes_decrypt_amd64)
|
||||
xor rofs+12(%rdi), %edx
|
||||
|
||||
// Jump based on byte key length * 16:
|
||||
cmp $[10*16], %esi
|
||||
cmp $(10*16), %esi
|
||||
je 3f
|
||||
cmp $[12*16], %esi
|
||||
cmp $(12*16), %esi
|
||||
je 2f
|
||||
cmp $[14*16], %esi
|
||||
cmp $(14*16), %esi
|
||||
je 1f
|
||||
mov $-1, %rax // error
|
||||
jmp 4f
|
||||
@@ -897,11 +897,11 @@ ENTRY_NP(aes_decrypt_amd64)
|
||||
mov 1*8(%rsp), %rbx
|
||||
mov 2*8(%rsp), %rbp
|
||||
mov 3*8(%rsp), %r12
|
||||
add $[4*8], %rsp
|
||||
add $(4*8), %rsp
|
||||
RET
|
||||
|
||||
SET_SIZE(aes_decrypt_amd64)
|
||||
#endif /* lint || __lint */
|
||||
#endif /* lint || __lint */
|
||||
|
||||
#ifdef __ELF__
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -31,12 +31,9 @@
|
||||
#include <sys/asm_linkage.h>
|
||||
|
||||
.intel_syntax noprefix
|
||||
.global zfs_blake3_hash_many_avx2
|
||||
.text
|
||||
|
||||
.type zfs_blake3_hash_many_avx2,@function
|
||||
.p2align 6
|
||||
zfs_blake3_hash_many_avx2:
|
||||
ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
|
||||
ENDBR
|
||||
push r15
|
||||
push r14
|
||||
@@ -1791,13 +1788,10 @@ zfs_blake3_hash_many_avx2:
|
||||
vmovdqu xmmword ptr [rbx+0x10], xmm1
|
||||
jmp 4b
|
||||
|
||||
.size zfs_blake3_hash_many_avx2, . - zfs_blake3_hash_many_avx2
|
||||
SET_SIZE(zfs_blake3_hash_many_avx2)
|
||||
|
||||
#ifdef __APPLE__
|
||||
.static_data
|
||||
#else
|
||||
SECTION_STATIC
|
||||
.section .rodata
|
||||
#endif
|
||||
|
||||
.p2align 6
|
||||
ADD0:
|
||||
|
||||
@@ -31,17 +31,9 @@
|
||||
#include <sys/asm_linkage.h>
|
||||
|
||||
.intel_syntax noprefix
|
||||
.global zfs_blake3_hash_many_avx512
|
||||
.global zfs_blake3_compress_in_place_avx512
|
||||
.global zfs_blake3_compress_xof_avx512
|
||||
.text
|
||||
|
||||
.type zfs_blake3_hash_many_avx512,@function
|
||||
.type zfs_blake3_compress_xof_avx512,@function
|
||||
.type zfs_blake3_compress_in_place_avx512,@function
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_hash_many_avx512:
|
||||
ENTRY_ALIGN(zfs_blake3_hash_many_avx512, 64)
|
||||
ENDBR
|
||||
push r15
|
||||
push r14
|
||||
@@ -2397,8 +2389,8 @@ zfs_blake3_hash_many_avx512:
|
||||
vmovdqu xmmword ptr [rbx], xmm0
|
||||
vmovdqu xmmword ptr [rbx+0x10], xmm1
|
||||
jmp 4b
|
||||
.p2align 6
|
||||
zfs_blake3_compress_in_place_avx512:
|
||||
|
||||
ENTRY_ALIGN(zfs_blake3_compress_in_place_avx512, 64)
|
||||
ENDBR
|
||||
vmovdqu xmm0, xmmword ptr [rdi]
|
||||
vmovdqu xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -2479,8 +2471,7 @@ zfs_blake3_compress_in_place_avx512:
|
||||
vmovdqu xmmword ptr [rdi+0x10], xmm1
|
||||
RET
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_compress_xof_avx512:
|
||||
ENTRY_ALIGN(zfs_blake3_compress_xof_avx512, 64)
|
||||
ENDBR
|
||||
vmovdqu xmm0, xmmword ptr [rdi]
|
||||
vmovdqu xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -2565,15 +2556,11 @@ zfs_blake3_compress_xof_avx512:
|
||||
vmovdqu xmmword ptr [r9+0x30], xmm3
|
||||
RET
|
||||
|
||||
.size zfs_blake3_hash_many_avx512, . - zfs_blake3_hash_many_avx512
|
||||
.size zfs_blake3_compress_in_place_avx512, . - zfs_blake3_compress_in_place_avx512
|
||||
.size zfs_blake3_compress_xof_avx512, . - zfs_blake3_compress_xof_avx512
|
||||
SET_SIZE(zfs_blake3_hash_many_avx512)
|
||||
SET_SIZE(zfs_blake3_compress_in_place_avx512)
|
||||
SET_SIZE(zfs_blake3_compress_xof_avx512)
|
||||
|
||||
#ifdef __APPLE__
|
||||
.static_data
|
||||
#else
|
||||
.section .rodata
|
||||
#endif
|
||||
SECTION_STATIC
|
||||
|
||||
.p2align 6
|
||||
INDEX0:
|
||||
|
||||
@@ -31,17 +31,10 @@
|
||||
#include <sys/asm_linkage.h>
|
||||
|
||||
.intel_syntax noprefix
|
||||
.global zfs_blake3_hash_many_sse2
|
||||
.global zfs_blake3_compress_in_place_sse2
|
||||
.global zfs_blake3_compress_xof_sse2
|
||||
|
||||
.text
|
||||
.type zfs_blake3_hash_many_sse2,@function
|
||||
.type zfs_blake3_compress_in_place_sse2,@function
|
||||
.type zfs_blake3_compress_xof_sse2,@function
|
||||
SECTION_TEXT
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_hash_many_sse2:
|
||||
ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64)
|
||||
ENDBR
|
||||
push r15
|
||||
push r14
|
||||
@@ -2038,8 +2031,7 @@ zfs_blake3_hash_many_sse2:
|
||||
movups xmmword ptr [rbx+0x10], xmm1
|
||||
jmp 4b
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_compress_in_place_sse2:
|
||||
ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64)
|
||||
ENDBR
|
||||
movups xmm0, xmmword ptr [rdi]
|
||||
movups xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -2149,8 +2141,7 @@ zfs_blake3_compress_in_place_sse2:
|
||||
movups xmmword ptr [rdi+0x10], xmm1
|
||||
RET
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_compress_xof_sse2:
|
||||
ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64)
|
||||
ENDBR
|
||||
movups xmm0, xmmword ptr [rdi]
|
||||
movups xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -2268,20 +2259,16 @@ zfs_blake3_compress_xof_sse2:
|
||||
movups xmmword ptr [r9+0x30], xmm3
|
||||
RET
|
||||
|
||||
.size zfs_blake3_hash_many_sse2, . - zfs_blake3_hash_many_sse2
|
||||
.size zfs_blake3_compress_in_place_sse2, . - zfs_blake3_compress_in_place_sse2
|
||||
.size zfs_blake3_compress_xof_sse2, . - zfs_blake3_compress_xof_sse2
|
||||
SET_SIZE(zfs_blake3_hash_many_sse2)
|
||||
SET_SIZE(zfs_blake3_compress_in_place_sse2)
|
||||
SET_SIZE(zfs_blake3_compress_xof_sse2)
|
||||
|
||||
#ifdef __APPLE__
|
||||
.static_data
|
||||
#else
|
||||
.section .rodata
|
||||
#endif
|
||||
SECTION_STATIC
|
||||
.p2align 6
|
||||
BLAKE3_IV:
|
||||
.long 0x6A09E667, 0xBB67AE85
|
||||
.long 0x3C6EF372, 0xA54FF53A
|
||||
ADD0:
|
||||
ADD0:
|
||||
.long 0, 1, 2, 3
|
||||
ADD1:
|
||||
.long 4, 4, 4, 4
|
||||
|
||||
@@ -31,17 +31,10 @@
|
||||
#include <sys/asm_linkage.h>
|
||||
|
||||
.intel_syntax noprefix
|
||||
.global zfs_blake3_compress_in_place_sse41
|
||||
.global zfs_blake3_compress_xof_sse41
|
||||
.global zfs_blake3_hash_many_sse41
|
||||
|
||||
.text
|
||||
.type zfs_blake3_hash_many_sse41,@function
|
||||
.type zfs_blake3_compress_in_place_sse41,@function
|
||||
.type zfs_blake3_compress_xof_sse41,@function
|
||||
|
||||
.p2align 6
|
||||
zfs_blake3_hash_many_sse41:
|
||||
ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64)
|
||||
ENDBR
|
||||
push r15
|
||||
push r14
|
||||
@@ -1800,8 +1793,8 @@ zfs_blake3_hash_many_sse41:
|
||||
movups xmmword ptr [rbx], xmm0
|
||||
movups xmmword ptr [rbx+0x10], xmm1
|
||||
jmp 4b
|
||||
.p2align 6
|
||||
zfs_blake3_compress_in_place_sse41:
|
||||
|
||||
ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64)
|
||||
ENDBR
|
||||
movups xmm0, xmmword ptr [rdi]
|
||||
movups xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -1899,8 +1892,8 @@ zfs_blake3_compress_in_place_sse41:
|
||||
movups xmmword ptr [rdi], xmm0
|
||||
movups xmmword ptr [rdi+0x10], xmm1
|
||||
RET
|
||||
.p2align 6
|
||||
zfs_blake3_compress_xof_sse41:
|
||||
|
||||
ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64)
|
||||
ENDBR
|
||||
movups xmm0, xmmword ptr [rdi]
|
||||
movups xmm1, xmmword ptr [rdi+0x10]
|
||||
@@ -2007,15 +2000,12 @@ zfs_blake3_compress_xof_sse41:
|
||||
movups xmmword ptr [r9+0x30], xmm3
|
||||
RET
|
||||
|
||||
.size zfs_blake3_hash_many_sse41, . - zfs_blake3_hash_many_sse41
|
||||
.size zfs_blake3_compress_in_place_sse41, . - zfs_blake3_compress_in_place_sse41
|
||||
.size zfs_blake3_compress_xof_sse41, . - zfs_blake3_compress_xof_sse41
|
||||
SET_SIZE(zfs_blake3_hash_many_sse41)
|
||||
SET_SIZE(zfs_blake3_compress_in_place_sse41)
|
||||
SET_SIZE(zfs_blake3_compress_xof_sse41)
|
||||
|
||||
SECTION_STATIC
|
||||
|
||||
#ifdef __APPLE__
|
||||
.static_data
|
||||
#else
|
||||
.section .rodata
|
||||
#endif
|
||||
.p2align 6
|
||||
BLAKE3_IV:
|
||||
.long 0x6A09E667, 0xBB67AE85
|
||||
@@ -2024,7 +2014,7 @@ ROT16:
|
||||
.byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
|
||||
ROT8:
|
||||
.byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
|
||||
ADD0:
|
||||
ADD0:
|
||||
.long 0, 1, 2, 3
|
||||
ADD1:
|
||||
.long 4, 4, 4, 4
|
||||
|
||||
@@ -50,14 +50,16 @@
|
||||
#define _ASM
|
||||
#include <sys/asm_linkage.h>
|
||||
|
||||
/* Windows userland links with OpenSSL */
|
||||
#if !defined (_WIN32) || defined (_KERNEL)
|
||||
|
||||
.extern gcm_avx_can_use_movbe
|
||||
|
||||
.text
|
||||
|
||||
#ifdef HAVE_MOVBE
|
||||
.type _aesni_ctr32_ghash_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_ghash_6x:
|
||||
.align 32
|
||||
FUNCTION(_aesni_ctr32_ghash_6x)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
@@ -369,12 +371,11 @@ _aesni_ctr32_ghash_6x:
|
||||
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
|
||||
SET_SIZE(_aesni_ctr32_ghash_6x)
|
||||
#endif /* ifdef HAVE_MOVBE */
|
||||
|
||||
.type _aesni_ctr32_ghash_no_movbe_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_ghash_no_movbe_6x:
|
||||
.align 32
|
||||
FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
@@ -698,12 +699,9 @@ _aesni_ctr32_ghash_no_movbe_6x:
|
||||
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x
|
||||
SET_SIZE(_aesni_ctr32_ghash_no_movbe_6x)
|
||||
|
||||
.globl aesni_gcm_decrypt
|
||||
.type aesni_gcm_decrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_decrypt:
|
||||
ENTRY_ALIGN(aesni_gcm_decrypt, 32)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
xorq %r10,%r10
|
||||
@@ -818,10 +816,10 @@ aesni_gcm_decrypt:
|
||||
movq %r10,%rax
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
||||
.type _aesni_ctr32_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_6x:
|
||||
SET_SIZE(aesni_gcm_decrypt)
|
||||
|
||||
.align 32
|
||||
FUNCTION(_aesni_ctr32_6x)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
vmovdqu 0-128(%rcx),%xmm4
|
||||
@@ -911,12 +909,9 @@ _aesni_ctr32_6x:
|
||||
vpxor %xmm4,%xmm14,%xmm14
|
||||
jmp .Loop_ctr32
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
|
||||
SET_SIZE(_aesni_ctr32_6x)
|
||||
|
||||
.globl aesni_gcm_encrypt
|
||||
.type aesni_gcm_encrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
ENTRY_ALIGN(aesni_gcm_encrypt, 32)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
xorq %r10,%r10
|
||||
@@ -1196,7 +1191,9 @@ aesni_gcm_encrypt:
|
||||
movq %r10,%rax
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
||||
SET_SIZE(aesni_gcm_encrypt)
|
||||
|
||||
#endif /* !_WIN32 || _KERNEL */
|
||||
|
||||
/* Some utility routines */
|
||||
|
||||
@@ -1204,13 +1201,10 @@ aesni_gcm_encrypt:
|
||||
* clear all fpu registers
|
||||
* void clear_fpu_regs_avx(void);
|
||||
*/
|
||||
.globl clear_fpu_regs_avx
|
||||
.type clear_fpu_regs_avx,@function
|
||||
.align 32
|
||||
clear_fpu_regs_avx:
|
||||
ENTRY_ALIGN(clear_fpu_regs_avx, 32)
|
||||
vzeroall
|
||||
RET
|
||||
.size clear_fpu_regs_avx,.-clear_fpu_regs_avx
|
||||
SET_SIZE(clear_fpu_regs_avx)
|
||||
|
||||
/*
|
||||
* void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
|
||||
@@ -1219,25 +1213,19 @@ clear_fpu_regs_avx:
|
||||
* stores the result at `dst'. The XOR is performed using FPU registers,
|
||||
* so make sure FPU state is saved when running this in the kernel.
|
||||
*/
|
||||
.globl gcm_xor_avx
|
||||
.type gcm_xor_avx,@function
|
||||
.align 32
|
||||
gcm_xor_avx:
|
||||
ENTRY_ALIGN(gcm_xor_avx, 32)
|
||||
movdqu (%rdi), %xmm0
|
||||
movdqu (%rsi), %xmm1
|
||||
pxor %xmm1, %xmm0
|
||||
movdqu %xmm0, (%rsi)
|
||||
RET
|
||||
.size gcm_xor_avx,.-gcm_xor_avx
|
||||
SET_SIZE(gcm_xor_avx)
|
||||
|
||||
/*
|
||||
* Toggle a boolean_t value atomically and return the new value.
|
||||
* boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
|
||||
*/
|
||||
.globl atomic_toggle_boolean_nv
|
||||
.type atomic_toggle_boolean_nv,@function
|
||||
.align 32
|
||||
atomic_toggle_boolean_nv:
|
||||
ENTRY_ALIGN(atomic_toggle_boolean_nv, 32)
|
||||
xorl %eax, %eax
|
||||
lock
|
||||
xorl $1, (%rdi)
|
||||
@@ -1245,9 +1233,10 @@ atomic_toggle_boolean_nv:
|
||||
movl $1, %eax
|
||||
1:
|
||||
RET
|
||||
.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
|
||||
SET_SIZE(atomic_toggle_boolean_nv)
|
||||
|
||||
SECTION_STATIC
|
||||
|
||||
.pushsection .rodata
|
||||
.align 64
|
||||
.Lbswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
@@ -1261,7 +1250,6 @@ atomic_toggle_boolean_nv:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.popsection
|
||||
|
||||
/* Mark the stack non-executable. */
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
||||
@@ -102,12 +102,13 @@
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_gmult_clmul
|
||||
.type gcm_gmult_clmul,@function
|
||||
.align 16
|
||||
gcm_gmult_clmul:
|
||||
/* Windows userland links with OpenSSL */
|
||||
#if !defined (_WIN32) || defined (_KERNEL)
|
||||
ENTRY_ALIGN(gcm_gmult_clmul, 16)
|
||||
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
|
||||
.L_gmult_clmul:
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa .Lbswap_mask(%rip),%xmm5
|
||||
@@ -155,12 +156,10 @@ gcm_gmult_clmul:
|
||||
movdqu %xmm0,(%rdi)
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_clmul,.-gcm_gmult_clmul
|
||||
SET_SIZE(gcm_gmult_clmul)
|
||||
#endif /* !_WIN32 || _KERNEL */
|
||||
|
||||
.globl gcm_init_htab_avx
|
||||
.type gcm_init_htab_avx,@function
|
||||
.align 32
|
||||
gcm_init_htab_avx:
|
||||
ENTRY_ALIGN(gcm_init_htab_avx, 32)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
vzeroupper
|
||||
@@ -269,21 +268,17 @@ gcm_init_htab_avx:
|
||||
vzeroupper
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size gcm_init_htab_avx,.-gcm_init_htab_avx
|
||||
SET_SIZE(gcm_init_htab_avx)
|
||||
|
||||
.globl gcm_gmult_avx
|
||||
.type gcm_gmult_avx,@function
|
||||
.align 32
|
||||
gcm_gmult_avx:
|
||||
#if !defined (_WIN32) || defined (_KERNEL)
|
||||
ENTRY_ALIGN(gcm_gmult_avx, 32)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
jmp .L_gmult_clmul
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_avx,.-gcm_gmult_avx
|
||||
.globl gcm_ghash_avx
|
||||
.type gcm_ghash_avx,@function
|
||||
.align 32
|
||||
gcm_ghash_avx:
|
||||
SET_SIZE(gcm_gmult_avx)
|
||||
|
||||
ENTRY_ALIGN(gcm_ghash_avx, 32)
|
||||
.cfi_startproc
|
||||
ENDBR
|
||||
vzeroupper
|
||||
@@ -658,9 +653,11 @@ gcm_ghash_avx:
|
||||
vzeroupper
|
||||
RET
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_avx,.-gcm_ghash_avx
|
||||
SET_SIZE(gcm_ghash_avx)
|
||||
|
||||
.pushsection .rodata
|
||||
#endif /* !_WIN32 || _KERNEL */
|
||||
|
||||
SECTION_STATIC
|
||||
.align 64
|
||||
.Lbswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
@@ -671,13 +668,13 @@ gcm_ghash_avx:
|
||||
.L7_mask_poly:
|
||||
.long 7,0,450,0
|
||||
.align 64
|
||||
.type .Lrem_4bit,@object
|
||||
SET_OBJ(.Lrem_4bit)
|
||||
.Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.type .Lrem_8bit,@object
|
||||
SET_OBJ(.Lrem_8bit)
|
||||
.Lrem_8bit:
|
||||
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
||||
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
||||
@@ -714,7 +711,6 @@ gcm_ghash_avx:
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.popsection
|
||||
|
||||
/* Mark the stack non-executable. */
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
||||
@@ -2065,7 +2065,7 @@ SET_SIZE(SHA256TransformBlocks)
|
||||
|
||||
.section .rodata
|
||||
.align 64
|
||||
.type K256,@object
|
||||
SET_OBJ(K256)
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
|
||||
@@ -2066,7 +2066,7 @@ SET_SIZE(SHA512TransformBlocks)
|
||||
|
||||
.section .rodata
|
||||
.align 64
|
||||
.type K512,@object
|
||||
SET_OBJ(K512)
|
||||
K512:
|
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
|
||||
@@ -2110,6 +2110,7 @@ K512:
|
||||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
||||
#endif /* !lint && !__lint */
|
||||
|
||||
#ifdef __ELF__
|
||||
#if defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user