mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 01:51:00 +03:00
Restore ASMABI and other Unify work
Make sure all SHA2 transform function has wrappers For ASMABI to work, it is required the calling convention is consistent. Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Signed-off-by: Joergen Lundman <lundman@lundman.net> Closes #14569
This commit is contained in:
parent
620a977f22
commit
47119d60ef
@ -34,15 +34,15 @@
|
|||||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||||
|
|
||||||
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags);
|
uint64_t counter, uint8_t flags);
|
||||||
|
|
||||||
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||||
|
|
||||||
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
|
extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||||
@ -100,15 +100,15 @@ const blake3_ops_t blake3_sse2_impl = {
|
|||||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||||
|
|
||||||
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags);
|
uint64_t counter, uint8_t flags);
|
||||||
|
|
||||||
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||||
|
|
||||||
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
|
extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||||
@ -163,7 +163,7 @@ const blake3_ops_t blake3_sse41_impl = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||||
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
|
extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||||
@ -196,15 +196,15 @@ blake3_avx2_impl = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||||
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags);
|
uint64_t counter, uint8_t flags);
|
||||||
|
|
||||||
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
|
extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||||
|
|
||||||
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
|
extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||||
|
@ -29,9 +29,10 @@
|
|||||||
#include <sys/simd.h>
|
#include <sys/simd.h>
|
||||||
|
|
||||||
#include <sha2/sha2_impl.h>
|
#include <sha2/sha2_impl.h>
|
||||||
|
#include <sys/asm_linkage.h>
|
||||||
|
|
||||||
#define TF(E, N) \
|
#define TF(E, N) \
|
||||||
extern void E(uint32_t s[8], const void *, size_t); \
|
extern void ASMABI E(uint32_t s[8], const void *, size_t); \
|
||||||
static inline void N(uint32_t s[8], const void *d, size_t b) { \
|
static inline void N(uint32_t s[8], const void *d, size_t b) { \
|
||||||
kfpu_begin(); E(s, d, b); kfpu_end(); \
|
kfpu_begin(); E(s, d, b); kfpu_end(); \
|
||||||
}
|
}
|
||||||
@ -44,10 +45,19 @@ static inline boolean_t sha2_is_supported(void)
|
|||||||
|
|
||||||
#if defined(__x86_64)
|
#if defined(__x86_64)
|
||||||
|
|
||||||
extern void zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t);
|
/* Users of ASMABI requires all calls to be from wrappers */
|
||||||
|
extern void ASMABI
|
||||||
|
zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t);
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
tf_sha256_transform_x64(uint32_t s[8], const void *d, size_t b)
|
||||||
|
{
|
||||||
|
zfs_sha256_transform_x64(s, d, b);
|
||||||
|
}
|
||||||
|
|
||||||
const sha256_ops_t sha256_x64_impl = {
|
const sha256_ops_t sha256_x64_impl = {
|
||||||
.is_supported = sha2_is_supported,
|
.is_supported = sha2_is_supported,
|
||||||
.transform = zfs_sha256_transform_x64,
|
.transform = tf_sha256_transform_x64,
|
||||||
.name = "x64"
|
.name = "x64"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -29,9 +29,10 @@
|
|||||||
#include <sys/simd.h>
|
#include <sys/simd.h>
|
||||||
|
|
||||||
#include <sha2/sha2_impl.h>
|
#include <sha2/sha2_impl.h>
|
||||||
|
#include <sys/asm_linkage.h>
|
||||||
|
|
||||||
#define TF(E, N) \
|
#define TF(E, N) \
|
||||||
extern void E(uint64_t s[8], const void *, size_t); \
|
extern void ASMABI E(uint64_t s[8], const void *, size_t); \
|
||||||
static inline void N(uint64_t s[8], const void *d, size_t b) { \
|
static inline void N(uint64_t s[8], const void *d, size_t b) { \
|
||||||
kfpu_begin(); E(s, d, b); kfpu_end(); \
|
kfpu_begin(); E(s, d, b); kfpu_end(); \
|
||||||
}
|
}
|
||||||
@ -44,10 +45,18 @@ static inline boolean_t sha2_is_supported(void)
|
|||||||
|
|
||||||
#if defined(__x86_64)
|
#if defined(__x86_64)
|
||||||
|
|
||||||
extern void zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t);
|
/* Users of ASMABI requires all calls to be from wrappers */
|
||||||
|
extern void ASMABI
|
||||||
|
zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t);
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
tf_sha512_transform_x64(uint64_t s[8], const void *d, size_t b)
|
||||||
|
{
|
||||||
|
zfs_sha512_transform_x64(s, d, b);
|
||||||
|
}
|
||||||
const sha512_ops_t sha512_x64_impl = {
|
const sha512_ops_t sha512_x64_impl = {
|
||||||
.is_supported = sha2_is_supported,
|
.is_supported = sha2_is_supported,
|
||||||
.transform = zfs_sha512_transform_x64,
|
.transform = tf_sha512_transform_x64,
|
||||||
.name = "x64"
|
.name = "x64"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -26,8 +26,8 @@
|
|||||||
|
|
||||||
SECTION_STATIC
|
SECTION_STATIC
|
||||||
|
|
||||||
.align 64
|
.balign 64
|
||||||
.type K256,@object
|
SET_OBJ(K256)
|
||||||
K256:
|
K256:
|
||||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||||
@ -105,7 +105,7 @@ ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
|
|||||||
movl 24(%rdi),%r10d
|
movl 24(%rdi),%r10d
|
||||||
movl 28(%rdi),%r11d
|
movl 28(%rdi),%r11d
|
||||||
jmp .Lloop
|
jmp .Lloop
|
||||||
.align 16
|
.balign 16
|
||||||
.Lloop:
|
.Lloop:
|
||||||
movl %ebx,%edi
|
movl %ebx,%edi
|
||||||
leaq K256(%rip),%rbp
|
leaq K256(%rip),%rbp
|
||||||
@ -622,7 +622,7 @@ ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
|
|||||||
addl %r12d,%eax
|
addl %r12d,%eax
|
||||||
leaq 20(%rbp),%rbp
|
leaq 20(%rbp),%rbp
|
||||||
jmp .Lrounds_16_xx
|
jmp .Lrounds_16_xx
|
||||||
.align 16
|
.balign 16
|
||||||
.Lrounds_16_xx:
|
.Lrounds_16_xx:
|
||||||
movl 4(%rsp),%r13d
|
movl 4(%rsp),%r13d
|
||||||
movl 56(%rsp),%r15d
|
movl 56(%rsp),%r15d
|
||||||
@ -1436,7 +1436,7 @@ ENTRY_ALIGN(zfs_sha256_transform_shani, 64)
|
|||||||
punpcklqdq %xmm0,%xmm2
|
punpcklqdq %xmm0,%xmm2
|
||||||
jmp .Loop_shani
|
jmp .Loop_shani
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Loop_shani:
|
.Loop_shani:
|
||||||
movdqu (%rsi),%xmm3
|
movdqu (%rsi),%xmm3
|
||||||
movdqu 16(%rsi),%xmm4
|
movdqu 16(%rsi),%xmm4
|
||||||
@ -1666,7 +1666,7 @@ ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
|
|||||||
movl 28(%rdi),%r11d
|
movl 28(%rdi),%r11d
|
||||||
|
|
||||||
jmp .Lloop_ssse3
|
jmp .Lloop_ssse3
|
||||||
.align 16
|
.balign 16
|
||||||
.Lloop_ssse3:
|
.Lloop_ssse3:
|
||||||
movdqa K256+512(%rip),%xmm7
|
movdqa K256+512(%rip),%xmm7
|
||||||
movdqu 0(%rsi),%xmm0
|
movdqu 0(%rsi),%xmm0
|
||||||
@ -1696,7 +1696,7 @@ ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
|
|||||||
movl %r8d,%r13d
|
movl %r8d,%r13d
|
||||||
jmp .Lssse3_00_47
|
jmp .Lssse3_00_47
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Lssse3_00_47:
|
.Lssse3_00_47:
|
||||||
subq $-128,%rbp
|
subq $-128,%rbp
|
||||||
rorl $14,%r13d
|
rorl $14,%r13d
|
||||||
@ -2779,7 +2779,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
|
|||||||
vmovdqa K256+512+32(%rip),%xmm8
|
vmovdqa K256+512+32(%rip),%xmm8
|
||||||
vmovdqa K256+512+64(%rip),%xmm9
|
vmovdqa K256+512+64(%rip),%xmm9
|
||||||
jmp .Lloop_avx
|
jmp .Lloop_avx
|
||||||
.align 16
|
.balign 16
|
||||||
.Lloop_avx:
|
.Lloop_avx:
|
||||||
vmovdqa K256+512(%rip),%xmm7
|
vmovdqa K256+512(%rip),%xmm7
|
||||||
vmovdqu 0(%rsi),%xmm0
|
vmovdqu 0(%rsi),%xmm0
|
||||||
@ -2805,7 +2805,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
|
|||||||
movl %r8d,%r13d
|
movl %r8d,%r13d
|
||||||
jmp .Lavx_00_47
|
jmp .Lavx_00_47
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Lavx_00_47:
|
.Lavx_00_47:
|
||||||
subq $-128,%rbp
|
subq $-128,%rbp
|
||||||
vpalignr $4,%xmm0,%xmm1,%xmm4
|
vpalignr $4,%xmm0,%xmm1,%xmm4
|
||||||
@ -3858,7 +3858,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
|
|||||||
vmovdqa K256+512+32(%rip),%ymm8
|
vmovdqa K256+512+32(%rip),%ymm8
|
||||||
vmovdqa K256+512+64(%rip),%ymm9
|
vmovdqa K256+512+64(%rip),%ymm9
|
||||||
jmp .Loop_avx2
|
jmp .Loop_avx2
|
||||||
.align 16
|
.balign 16
|
||||||
.Loop_avx2:
|
.Loop_avx2:
|
||||||
vmovdqa K256+512(%rip),%ymm7
|
vmovdqa K256+512(%rip),%ymm7
|
||||||
vmovdqu -64+0(%rsi),%xmm0
|
vmovdqu -64+0(%rsi),%xmm0
|
||||||
@ -3900,7 +3900,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
|
|||||||
subq $-32*4,%rbp
|
subq $-32*4,%rbp
|
||||||
jmp .Lavx2_00_47
|
jmp .Lavx2_00_47
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Lavx2_00_47:
|
.Lavx2_00_47:
|
||||||
leaq -64(%rsp),%rsp
|
leaq -64(%rsp),%rsp
|
||||||
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
|
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
|
||||||
@ -4842,7 +4842,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
|
|||||||
xorl %ecx,%edi
|
xorl %ecx,%edi
|
||||||
movl %r9d,%r12d
|
movl %r9d,%r12d
|
||||||
jmp .Lower_avx2
|
jmp .Lower_avx2
|
||||||
.align 16
|
.balign 16
|
||||||
.Lower_avx2:
|
.Lower_avx2:
|
||||||
addl 0+16(%rbp),%r11d
|
addl 0+16(%rbp),%r11d
|
||||||
andl %r8d,%r12d
|
andl %r8d,%r12d
|
||||||
|
@ -26,8 +26,8 @@
|
|||||||
|
|
||||||
SECTION_STATIC
|
SECTION_STATIC
|
||||||
|
|
||||||
.align 64
|
.balign 64
|
||||||
.type K512,@object
|
SET_OBJ(K512)
|
||||||
K512:
|
K512:
|
||||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||||
@ -148,7 +148,7 @@ ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
|
|||||||
movq 48(%rdi),%r10
|
movq 48(%rdi),%r10
|
||||||
movq 56(%rdi),%r11
|
movq 56(%rdi),%r11
|
||||||
jmp .Lloop
|
jmp .Lloop
|
||||||
.align 16
|
.balign 16
|
||||||
.Lloop:
|
.Lloop:
|
||||||
movq %rbx,%rdi
|
movq %rbx,%rdi
|
||||||
leaq K512(%rip),%rbp
|
leaq K512(%rip),%rbp
|
||||||
@ -665,7 +665,7 @@ ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
|
|||||||
addq %r12,%rax
|
addq %r12,%rax
|
||||||
leaq 24(%rbp),%rbp
|
leaq 24(%rbp),%rbp
|
||||||
jmp .Lrounds_16_xx
|
jmp .Lrounds_16_xx
|
||||||
.align 16
|
.balign 16
|
||||||
.Lrounds_16_xx:
|
.Lrounds_16_xx:
|
||||||
movq 8(%rsp),%r13
|
movq 8(%rsp),%r13
|
||||||
movq 112(%rsp),%r15
|
movq 112(%rsp),%r15
|
||||||
@ -1501,7 +1501,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx, 64)
|
|||||||
movq 48(%rdi),%r10
|
movq 48(%rdi),%r10
|
||||||
movq 56(%rdi),%r11
|
movq 56(%rdi),%r11
|
||||||
jmp .Lloop_avx
|
jmp .Lloop_avx
|
||||||
.align 16
|
.balign 16
|
||||||
.Lloop_avx:
|
.Lloop_avx:
|
||||||
vmovdqa K512+1280(%rip),%xmm11
|
vmovdqa K512+1280(%rip),%xmm11
|
||||||
vmovdqu 0(%rsi),%xmm0
|
vmovdqu 0(%rsi),%xmm0
|
||||||
@ -1543,7 +1543,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx, 64)
|
|||||||
movq %r8,%r13
|
movq %r8,%r13
|
||||||
jmp .Lavx_00_47
|
jmp .Lavx_00_47
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Lavx_00_47:
|
.Lavx_00_47:
|
||||||
addq $256,%rbp
|
addq $256,%rbp
|
||||||
vpalignr $8,%xmm0,%xmm1,%xmm8
|
vpalignr $8,%xmm0,%xmm1,%xmm8
|
||||||
@ -2670,7 +2670,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
|
|||||||
movq 48(%rdi),%r10
|
movq 48(%rdi),%r10
|
||||||
movq 56(%rdi),%r11
|
movq 56(%rdi),%r11
|
||||||
jmp .Loop_avx2
|
jmp .Loop_avx2
|
||||||
.align 16
|
.balign 16
|
||||||
.Loop_avx2:
|
.Loop_avx2:
|
||||||
vmovdqu -128(%rsi),%xmm0
|
vmovdqu -128(%rsi),%xmm0
|
||||||
vmovdqu -128+16(%rsi),%xmm1
|
vmovdqu -128+16(%rsi),%xmm1
|
||||||
@ -2732,7 +2732,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
|
|||||||
addq $32*8,%rbp
|
addq $32*8,%rbp
|
||||||
jmp .Lavx2_00_47
|
jmp .Lavx2_00_47
|
||||||
|
|
||||||
.align 16
|
.balign 16
|
||||||
.Lavx2_00_47:
|
.Lavx2_00_47:
|
||||||
leaq -128(%rsp),%rsp
|
leaq -128(%rsp),%rsp
|
||||||
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
|
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
|
||||||
@ -3750,7 +3750,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
|
|||||||
xorq %rcx,%rdi
|
xorq %rcx,%rdi
|
||||||
movq %r9,%r12
|
movq %r9,%r12
|
||||||
jmp .Lower_avx2
|
jmp .Lower_avx2
|
||||||
.align 16
|
.balign 16
|
||||||
.Lower_avx2:
|
.Lower_avx2:
|
||||||
addq 0+16(%rbp),%r11
|
addq 0+16(%rbp),%r11
|
||||||
andq %r8,%r12
|
andq %r8,%r12
|
||||||
|
Loading…
Reference in New Issue
Block a user