mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-29 12:29:35 +03:00
31b160f0a6
Currently SIMD accelerated AES-GCM performance is limited by two factors: a. The need to disable preemption and interrupts and save the FPU state before using it and to do the reverse when done. Due to the way the code is organized (see (b) below) we have to pay this price twice for each 16 byte GCM block processed. b. Most processing is done in C, operating on single GCM blocks. The use of SIMD instructions is limited to the AES encryption of the counter block (AES-NI) and the Galois multiplication (PCLMULQDQ). This leads to the FPU not being fully utilized for crypto operations. To solve (a) we do crypto processing in larger chunks while owning the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced to make this chunk size tweakable. It defaults to 32 KiB. This step alone roughly doubles performance. (b) is tackled by porting and using the highly optimized openssl AES-GCM assembler routines, which do all the processing (CTR, AES, GMULT) in a single routine. Both steps together result in up to 32x reduction of the time spend in the en/decryption routines, leading up to approximately 12x throughput increase for large (128 KiB) blocks. Lastly, this commit changes the default encryption algorithm from AES-CCM to AES-GCM when setting the `encryption=on` property. Reviewed-By: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-By: Jason King <jason.king@joyent.com> Reviewed-By: Tom Caputi <tcaputi@datto.com> Reviewed-By: Richard Laager <rlaager@wiktel.com> Signed-off-by: Attila Fülöp <attila@fueloep.org> Closes #9749
893 lines
22 KiB
ArmAsm
893 lines
22 KiB
ArmAsm
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
#
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
#
|
|
#
|
|
# AES-NI-CTR+GHASH stitch.
|
|
#
|
|
# February 2013
|
|
#
|
|
# OpenSSL GCM implementation is organized in such way that its
|
|
# performance is rather close to the sum of its streamed components,
|
|
# in the context parallelized AES-NI CTR and modulo-scheduled
|
|
# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation
|
|
# was observed to perform significantly better than the sum of the
|
|
# components on contemporary CPUs, the effort was deemed impossible to
|
|
# justify. This module is based on combination of Intel submissions,
|
|
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
|
|
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
|
|
# pressure with notable relative improvement, achieving 1.0 cycle per
|
|
# byte processed with 128-bit key on Haswell processor, 0.74 - on
|
|
# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
|
|
# measurements for favourable packet size, one divisible by 96.
|
|
# Applications using the EVP interface will observe a few percent
|
|
# worse performance.]
|
|
#
|
|
# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
|
|
#
|
|
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
|
|
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
|
|
|
|
# Generated once from
|
|
# https://github.com/openssl/openssl/blob/5ffc3324/crypto/modes/asm/aesni-gcm-x86_64.pl
|
|
# and modified for ICP. Modification are kept at a bare minimum to ease later
|
|
# upstream merges.
|
|
|
|
#if defined(__x86_64__) && defined(HAVE_AVX) && \
|
|
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) && defined(HAVE_MOVBE)
|
|
|
|
.text
|
|
|
|
.type _aesni_ctr32_ghash_6x,@function
|
|
.align 32
|
|
_aesni_ctr32_ghash_6x:
|
|
vmovdqu 32(%r11),%xmm2
|
|
subq $6,%rdx
|
|
vpxor %xmm4,%xmm4,%xmm4
|
|
vmovdqu 0-128(%rcx),%xmm15
|
|
vpaddb %xmm2,%xmm1,%xmm10
|
|
vpaddb %xmm2,%xmm10,%xmm11
|
|
vpaddb %xmm2,%xmm11,%xmm12
|
|
vpaddb %xmm2,%xmm12,%xmm13
|
|
vpaddb %xmm2,%xmm13,%xmm14
|
|
vpxor %xmm15,%xmm1,%xmm9
|
|
vmovdqu %xmm4,16+8(%rsp)
|
|
jmp .Loop6x
|
|
|
|
.align 32
|
|
.Loop6x:
|
|
addl $100663296,%ebx
|
|
jc .Lhandle_ctr32
|
|
vmovdqu 0-32(%r9),%xmm3
|
|
vpaddb %xmm2,%xmm14,%xmm1
|
|
vpxor %xmm15,%xmm10,%xmm10
|
|
vpxor %xmm15,%xmm11,%xmm11
|
|
|
|
.Lresume_ctr32:
|
|
vmovdqu %xmm1,(%r8)
|
|
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
|
|
vpxor %xmm15,%xmm12,%xmm12
|
|
vmovups 16-128(%rcx),%xmm2
|
|
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
|
|
xorq %r12,%r12
|
|
cmpq %r14,%r15
|
|
|
|
vaesenc %xmm2,%xmm9,%xmm9
|
|
vmovdqu 48+8(%rsp),%xmm0
|
|
vpxor %xmm15,%xmm13,%xmm13
|
|
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
|
|
vaesenc %xmm2,%xmm10,%xmm10
|
|
vpxor %xmm15,%xmm14,%xmm14
|
|
setnc %r12b
|
|
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
|
vaesenc %xmm2,%xmm11,%xmm11
|
|
vmovdqu 16-32(%r9),%xmm3
|
|
negq %r12
|
|
vaesenc %xmm2,%xmm12,%xmm12
|
|
vpxor %xmm5,%xmm6,%xmm6
|
|
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
|
|
vpxor %xmm4,%xmm8,%xmm8
|
|
vaesenc %xmm2,%xmm13,%xmm13
|
|
vpxor %xmm5,%xmm1,%xmm4
|
|
andq $0x60,%r12
|
|
vmovups 32-128(%rcx),%xmm15
|
|
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
|
|
vaesenc %xmm2,%xmm14,%xmm14
|
|
|
|
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
|
|
leaq (%r14,%r12,1),%r14
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor 16+8(%rsp),%xmm8,%xmm8
|
|
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
|
|
vmovdqu 64+8(%rsp),%xmm0
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
movbeq 88(%r14),%r13
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
movbeq 80(%r14),%r12
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
movq %r13,32+8(%rsp)
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
movq %r12,40+8(%rsp)
|
|
vmovdqu 48-32(%r9),%xmm5
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vmovups 48-128(%rcx),%xmm15
|
|
vpxor %xmm1,%xmm6,%xmm6
|
|
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor %xmm2,%xmm6,%xmm6
|
|
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vpxor %xmm3,%xmm7,%xmm7
|
|
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
|
|
vmovdqu 80+8(%rsp),%xmm0
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vpxor %xmm1,%xmm4,%xmm4
|
|
vmovdqu 64-32(%r9),%xmm1
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vmovups 64-128(%rcx),%xmm15
|
|
vpxor %xmm2,%xmm6,%xmm6
|
|
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor %xmm3,%xmm6,%xmm6
|
|
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
movbeq 72(%r14),%r13
|
|
vpxor %xmm5,%xmm7,%xmm7
|
|
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
movbeq 64(%r14),%r12
|
|
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
|
|
vmovdqu 96+8(%rsp),%xmm0
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
movq %r13,48+8(%rsp)
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
movq %r12,56+8(%rsp)
|
|
vpxor %xmm2,%xmm4,%xmm4
|
|
vmovdqu 96-32(%r9),%xmm2
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vmovups 80-128(%rcx),%xmm15
|
|
vpxor %xmm3,%xmm6,%xmm6
|
|
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor %xmm5,%xmm6,%xmm6
|
|
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
movbeq 56(%r14),%r13
|
|
vpxor %xmm1,%xmm7,%xmm7
|
|
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
|
|
vpxor 112+8(%rsp),%xmm8,%xmm8
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
movbeq 48(%r14),%r12
|
|
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
movq %r13,64+8(%rsp)
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
movq %r12,72+8(%rsp)
|
|
vpxor %xmm3,%xmm4,%xmm4
|
|
vmovdqu 112-32(%r9),%xmm3
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vmovups 96-128(%rcx),%xmm15
|
|
vpxor %xmm5,%xmm6,%xmm6
|
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor %xmm1,%xmm6,%xmm6
|
|
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
movbeq 40(%r14),%r13
|
|
vpxor %xmm2,%xmm7,%xmm7
|
|
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
movbeq 32(%r14),%r12
|
|
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
movq %r13,80+8(%rsp)
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
movq %r12,88+8(%rsp)
|
|
vpxor %xmm5,%xmm6,%xmm6
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
vpxor %xmm1,%xmm6,%xmm6
|
|
|
|
vmovups 112-128(%rcx),%xmm15
|
|
vpslldq $8,%xmm6,%xmm5
|
|
vpxor %xmm2,%xmm4,%xmm4
|
|
vmovdqu 16(%r11),%xmm3
|
|
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor %xmm8,%xmm7,%xmm7
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
movbeq 24(%r14),%r13
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
movbeq 16(%r14),%r12
|
|
vpalignr $8,%xmm4,%xmm4,%xmm0
|
|
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
|
movq %r13,96+8(%rsp)
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
movq %r12,104+8(%rsp)
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vmovups 128-128(%rcx),%xmm1
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vaesenc %xmm1,%xmm9,%xmm9
|
|
vmovups 144-128(%rcx),%xmm15
|
|
vaesenc %xmm1,%xmm10,%xmm10
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
vaesenc %xmm1,%xmm11,%xmm11
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
vaesenc %xmm1,%xmm12,%xmm12
|
|
vpxor %xmm0,%xmm4,%xmm4
|
|
movbeq 8(%r14),%r13
|
|
vaesenc %xmm1,%xmm13,%xmm13
|
|
movbeq 0(%r14),%r12
|
|
vaesenc %xmm1,%xmm14,%xmm14
|
|
vmovups 160-128(%rcx),%xmm1
|
|
cmpl $12,%ebp // ICP uses 10,12,14 not 9,11,13 for rounds.
|
|
jb .Lenc_tail
|
|
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vaesenc %xmm1,%xmm9,%xmm9
|
|
vaesenc %xmm1,%xmm10,%xmm10
|
|
vaesenc %xmm1,%xmm11,%xmm11
|
|
vaesenc %xmm1,%xmm12,%xmm12
|
|
vaesenc %xmm1,%xmm13,%xmm13
|
|
vmovups 176-128(%rcx),%xmm15
|
|
vaesenc %xmm1,%xmm14,%xmm14
|
|
vmovups 192-128(%rcx),%xmm1
|
|
cmpl $14,%ebp // ICP does not zero key schedule.
|
|
jb .Lenc_tail
|
|
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
|
|
vaesenc %xmm1,%xmm9,%xmm9
|
|
vaesenc %xmm1,%xmm10,%xmm10
|
|
vaesenc %xmm1,%xmm11,%xmm11
|
|
vaesenc %xmm1,%xmm12,%xmm12
|
|
vaesenc %xmm1,%xmm13,%xmm13
|
|
vmovups 208-128(%rcx),%xmm15
|
|
vaesenc %xmm1,%xmm14,%xmm14
|
|
vmovups 224-128(%rcx),%xmm1
|
|
jmp .Lenc_tail
|
|
|
|
.align 32
|
|
.Lhandle_ctr32:
|
|
vmovdqu (%r11),%xmm0
|
|
vpshufb %xmm0,%xmm1,%xmm6
|
|
vmovdqu 48(%r11),%xmm5
|
|
vpaddd 64(%r11),%xmm6,%xmm10
|
|
vpaddd %xmm5,%xmm6,%xmm11
|
|
vmovdqu 0-32(%r9),%xmm3
|
|
vpaddd %xmm5,%xmm10,%xmm12
|
|
vpshufb %xmm0,%xmm10,%xmm10
|
|
vpaddd %xmm5,%xmm11,%xmm13
|
|
vpshufb %xmm0,%xmm11,%xmm11
|
|
vpxor %xmm15,%xmm10,%xmm10
|
|
vpaddd %xmm5,%xmm12,%xmm14
|
|
vpshufb %xmm0,%xmm12,%xmm12
|
|
vpxor %xmm15,%xmm11,%xmm11
|
|
vpaddd %xmm5,%xmm13,%xmm1
|
|
vpshufb %xmm0,%xmm13,%xmm13
|
|
vpshufb %xmm0,%xmm14,%xmm14
|
|
vpshufb %xmm0,%xmm1,%xmm1
|
|
jmp .Lresume_ctr32
|
|
|
|
.align 32
|
|
.Lenc_tail:
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vmovdqu %xmm7,16+8(%rsp)
|
|
vpalignr $8,%xmm4,%xmm4,%xmm8
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
|
vpxor 0(%rdi),%xmm1,%xmm2
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vpxor 16(%rdi),%xmm1,%xmm0
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vpxor 32(%rdi),%xmm1,%xmm5
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vpxor 48(%rdi),%xmm1,%xmm6
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
vpxor 64(%rdi),%xmm1,%xmm7
|
|
vpxor 80(%rdi),%xmm1,%xmm3
|
|
vmovdqu (%r8),%xmm1
|
|
|
|
vaesenclast %xmm2,%xmm9,%xmm9
|
|
vmovdqu 32(%r11),%xmm2
|
|
vaesenclast %xmm0,%xmm10,%xmm10
|
|
vpaddb %xmm2,%xmm1,%xmm0
|
|
movq %r13,112+8(%rsp)
|
|
leaq 96(%rdi),%rdi
|
|
vaesenclast %xmm5,%xmm11,%xmm11
|
|
vpaddb %xmm2,%xmm0,%xmm5
|
|
movq %r12,120+8(%rsp)
|
|
leaq 96(%rsi),%rsi
|
|
vmovdqu 0-128(%rcx),%xmm15
|
|
vaesenclast %xmm6,%xmm12,%xmm12
|
|
vpaddb %xmm2,%xmm5,%xmm6
|
|
vaesenclast %xmm7,%xmm13,%xmm13
|
|
vpaddb %xmm2,%xmm6,%xmm7
|
|
vaesenclast %xmm3,%xmm14,%xmm14
|
|
vpaddb %xmm2,%xmm7,%xmm3
|
|
|
|
addq $0x60,%r10
|
|
subq $0x6,%rdx
|
|
jc .L6x_done
|
|
|
|
vmovups %xmm9,-96(%rsi)
|
|
vpxor %xmm15,%xmm1,%xmm9
|
|
vmovups %xmm10,-80(%rsi)
|
|
vmovdqa %xmm0,%xmm10
|
|
vmovups %xmm11,-64(%rsi)
|
|
vmovdqa %xmm5,%xmm11
|
|
vmovups %xmm12,-48(%rsi)
|
|
vmovdqa %xmm6,%xmm12
|
|
vmovups %xmm13,-32(%rsi)
|
|
vmovdqa %xmm7,%xmm13
|
|
vmovups %xmm14,-16(%rsi)
|
|
vmovdqa %xmm3,%xmm14
|
|
vmovdqu 32+8(%rsp),%xmm7
|
|
jmp .Loop6x
|
|
|
|
.L6x_done:
|
|
vpxor 16+8(%rsp),%xmm8,%xmm8
|
|
vpxor %xmm4,%xmm8,%xmm8
|
|
|
|
.byte 0xf3,0xc3
|
|
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
|
|
.globl aesni_gcm_decrypt
|
|
.type aesni_gcm_decrypt,@function
|
|
.align 32
|
|
aesni_gcm_decrypt:
|
|
.cfi_startproc
|
|
xorq %r10,%r10
|
|
cmpq $0x60,%rdx
|
|
jb .Lgcm_dec_abort
|
|
|
|
leaq (%rsp),%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
vzeroupper
|
|
|
|
vmovdqu (%r8),%xmm1
|
|
addq $-128,%rsp
|
|
movl 12(%r8),%ebx
|
|
leaq .Lbswap_mask(%rip),%r11
|
|
leaq -128(%rcx),%r14
|
|
movq $0xf80,%r15
|
|
vmovdqu (%r9),%xmm8
|
|
andq $-128,%rsp
|
|
vmovdqu (%r11),%xmm0
|
|
leaq 128(%rcx),%rcx
|
|
leaq 32+32(%r9),%r9
|
|
movl 504-128(%rcx),%ebp // ICP has a larger offset for rounds.
|
|
vpshufb %xmm0,%xmm8,%xmm8
|
|
|
|
andq %r15,%r14
|
|
andq %rsp,%r15
|
|
subq %r14,%r15
|
|
jc .Ldec_no_key_aliasing
|
|
cmpq $768,%r15
|
|
jnc .Ldec_no_key_aliasing
|
|
subq %r15,%rsp
|
|
.Ldec_no_key_aliasing:
|
|
|
|
vmovdqu 80(%rdi),%xmm7
|
|
leaq (%rdi),%r14
|
|
vmovdqu 64(%rdi),%xmm4
|
|
leaq -192(%rdi,%rdx,1),%r15
|
|
vmovdqu 48(%rdi),%xmm5
|
|
shrq $4,%rdx
|
|
xorq %r10,%r10
|
|
vmovdqu 32(%rdi),%xmm6
|
|
vpshufb %xmm0,%xmm7,%xmm7
|
|
vmovdqu 16(%rdi),%xmm2
|
|
vpshufb %xmm0,%xmm4,%xmm4
|
|
vmovdqu (%rdi),%xmm3
|
|
vpshufb %xmm0,%xmm5,%xmm5
|
|
vmovdqu %xmm4,48(%rsp)
|
|
vpshufb %xmm0,%xmm6,%xmm6
|
|
vmovdqu %xmm5,64(%rsp)
|
|
vpshufb %xmm0,%xmm2,%xmm2
|
|
vmovdqu %xmm6,80(%rsp)
|
|
vpshufb %xmm0,%xmm3,%xmm3
|
|
vmovdqu %xmm2,96(%rsp)
|
|
vmovdqu %xmm3,112(%rsp)
|
|
|
|
call _aesni_ctr32_ghash_6x
|
|
|
|
vmovups %xmm9,-96(%rsi)
|
|
vmovups %xmm10,-80(%rsi)
|
|
vmovups %xmm11,-64(%rsi)
|
|
vmovups %xmm12,-48(%rsi)
|
|
vmovups %xmm13,-32(%rsi)
|
|
vmovups %xmm14,-16(%rsi)
|
|
|
|
vpshufb (%r11),%xmm8,%xmm8
|
|
vmovdqu %xmm8,-64(%r9)
|
|
|
|
vzeroupper
|
|
movq -48(%rax),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rax),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rax),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rax),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rax),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rax),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rax),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lgcm_dec_abort:
|
|
movq %r10,%rax
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
|
.type _aesni_ctr32_6x,@function
|
|
.align 32
|
|
_aesni_ctr32_6x:
|
|
vmovdqu 0-128(%rcx),%xmm4
|
|
vmovdqu 32(%r11),%xmm2
|
|
leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds.
|
|
vmovups 16-128(%rcx),%xmm15
|
|
leaq 32-128(%rcx),%r12
|
|
vpxor %xmm4,%xmm1,%xmm9
|
|
addl $100663296,%ebx
|
|
jc .Lhandle_ctr32_2
|
|
vpaddb %xmm2,%xmm1,%xmm10
|
|
vpaddb %xmm2,%xmm10,%xmm11
|
|
vpxor %xmm4,%xmm10,%xmm10
|
|
vpaddb %xmm2,%xmm11,%xmm12
|
|
vpxor %xmm4,%xmm11,%xmm11
|
|
vpaddb %xmm2,%xmm12,%xmm13
|
|
vpxor %xmm4,%xmm12,%xmm12
|
|
vpaddb %xmm2,%xmm13,%xmm14
|
|
vpxor %xmm4,%xmm13,%xmm13
|
|
vpaddb %xmm2,%xmm14,%xmm1
|
|
vpxor %xmm4,%xmm14,%xmm14
|
|
jmp .Loop_ctr32
|
|
|
|
.align 16
|
|
.Loop_ctr32:
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
vmovups (%r12),%xmm15
|
|
leaq 16(%r12),%r12
|
|
decl %r13d
|
|
jnz .Loop_ctr32
|
|
|
|
vmovdqu (%r12),%xmm3
|
|
vaesenc %xmm15,%xmm9,%xmm9
|
|
vpxor 0(%rdi),%xmm3,%xmm4
|
|
vaesenc %xmm15,%xmm10,%xmm10
|
|
vpxor 16(%rdi),%xmm3,%xmm5
|
|
vaesenc %xmm15,%xmm11,%xmm11
|
|
vpxor 32(%rdi),%xmm3,%xmm6
|
|
vaesenc %xmm15,%xmm12,%xmm12
|
|
vpxor 48(%rdi),%xmm3,%xmm8
|
|
vaesenc %xmm15,%xmm13,%xmm13
|
|
vpxor 64(%rdi),%xmm3,%xmm2
|
|
vaesenc %xmm15,%xmm14,%xmm14
|
|
vpxor 80(%rdi),%xmm3,%xmm3
|
|
leaq 96(%rdi),%rdi
|
|
|
|
vaesenclast %xmm4,%xmm9,%xmm9
|
|
vaesenclast %xmm5,%xmm10,%xmm10
|
|
vaesenclast %xmm6,%xmm11,%xmm11
|
|
vaesenclast %xmm8,%xmm12,%xmm12
|
|
vaesenclast %xmm2,%xmm13,%xmm13
|
|
vaesenclast %xmm3,%xmm14,%xmm14
|
|
vmovups %xmm9,0(%rsi)
|
|
vmovups %xmm10,16(%rsi)
|
|
vmovups %xmm11,32(%rsi)
|
|
vmovups %xmm12,48(%rsi)
|
|
vmovups %xmm13,64(%rsi)
|
|
vmovups %xmm14,80(%rsi)
|
|
leaq 96(%rsi),%rsi
|
|
|
|
.byte 0xf3,0xc3
|
|
.align 32
|
|
.Lhandle_ctr32_2:
|
|
vpshufb %xmm0,%xmm1,%xmm6
|
|
vmovdqu 48(%r11),%xmm5
|
|
vpaddd 64(%r11),%xmm6,%xmm10
|
|
vpaddd %xmm5,%xmm6,%xmm11
|
|
vpaddd %xmm5,%xmm10,%xmm12
|
|
vpshufb %xmm0,%xmm10,%xmm10
|
|
vpaddd %xmm5,%xmm11,%xmm13
|
|
vpshufb %xmm0,%xmm11,%xmm11
|
|
vpxor %xmm4,%xmm10,%xmm10
|
|
vpaddd %xmm5,%xmm12,%xmm14
|
|
vpshufb %xmm0,%xmm12,%xmm12
|
|
vpxor %xmm4,%xmm11,%xmm11
|
|
vpaddd %xmm5,%xmm13,%xmm1
|
|
vpshufb %xmm0,%xmm13,%xmm13
|
|
vpxor %xmm4,%xmm12,%xmm12
|
|
vpshufb %xmm0,%xmm14,%xmm14
|
|
vpxor %xmm4,%xmm13,%xmm13
|
|
vpshufb %xmm0,%xmm1,%xmm1
|
|
vpxor %xmm4,%xmm14,%xmm14
|
|
jmp .Loop_ctr32
|
|
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
|
|
|
|
.globl aesni_gcm_encrypt
|
|
.type aesni_gcm_encrypt,@function
|
|
.align 32
|
|
aesni_gcm_encrypt:
|
|
.cfi_startproc
|
|
xorq %r10,%r10
|
|
cmpq $288,%rdx
|
|
jb .Lgcm_enc_abort
|
|
|
|
leaq (%rsp),%rax
|
|
.cfi_def_cfa_register %rax
|
|
pushq %rbx
|
|
.cfi_offset %rbx,-16
|
|
pushq %rbp
|
|
.cfi_offset %rbp,-24
|
|
pushq %r12
|
|
.cfi_offset %r12,-32
|
|
pushq %r13
|
|
.cfi_offset %r13,-40
|
|
pushq %r14
|
|
.cfi_offset %r14,-48
|
|
pushq %r15
|
|
.cfi_offset %r15,-56
|
|
vzeroupper
|
|
|
|
vmovdqu (%r8),%xmm1
|
|
addq $-128,%rsp
|
|
movl 12(%r8),%ebx
|
|
leaq .Lbswap_mask(%rip),%r11
|
|
leaq -128(%rcx),%r14
|
|
movq $0xf80,%r15
|
|
leaq 128(%rcx),%rcx
|
|
vmovdqu (%r11),%xmm0
|
|
andq $-128,%rsp
|
|
movl 504-128(%rcx),%ebp // ICP has an larger offset for rounds.
|
|
|
|
andq %r15,%r14
|
|
andq %rsp,%r15
|
|
subq %r14,%r15
|
|
jc .Lenc_no_key_aliasing
|
|
cmpq $768,%r15
|
|
jnc .Lenc_no_key_aliasing
|
|
subq %r15,%rsp
|
|
.Lenc_no_key_aliasing:
|
|
|
|
leaq (%rsi),%r14
|
|
leaq -192(%rsi,%rdx,1),%r15
|
|
shrq $4,%rdx
|
|
|
|
call _aesni_ctr32_6x
|
|
vpshufb %xmm0,%xmm9,%xmm8
|
|
vpshufb %xmm0,%xmm10,%xmm2
|
|
vmovdqu %xmm8,112(%rsp)
|
|
vpshufb %xmm0,%xmm11,%xmm4
|
|
vmovdqu %xmm2,96(%rsp)
|
|
vpshufb %xmm0,%xmm12,%xmm5
|
|
vmovdqu %xmm4,80(%rsp)
|
|
vpshufb %xmm0,%xmm13,%xmm6
|
|
vmovdqu %xmm5,64(%rsp)
|
|
vpshufb %xmm0,%xmm14,%xmm7
|
|
vmovdqu %xmm6,48(%rsp)
|
|
|
|
call _aesni_ctr32_6x
|
|
|
|
vmovdqu (%r9),%xmm8
|
|
leaq 32+32(%r9),%r9
|
|
subq $12,%rdx
|
|
movq $192,%r10
|
|
vpshufb %xmm0,%xmm8,%xmm8
|
|
|
|
call _aesni_ctr32_ghash_6x
|
|
vmovdqu 32(%rsp),%xmm7
|
|
vmovdqu (%r11),%xmm0
|
|
vmovdqu 0-32(%r9),%xmm3
|
|
vpunpckhqdq %xmm7,%xmm7,%xmm1
|
|
vmovdqu 32-32(%r9),%xmm15
|
|
vmovups %xmm9,-96(%rsi)
|
|
vpshufb %xmm0,%xmm9,%xmm9
|
|
vpxor %xmm7,%xmm1,%xmm1
|
|
vmovups %xmm10,-80(%rsi)
|
|
vpshufb %xmm0,%xmm10,%xmm10
|
|
vmovups %xmm11,-64(%rsi)
|
|
vpshufb %xmm0,%xmm11,%xmm11
|
|
vmovups %xmm12,-48(%rsi)
|
|
vpshufb %xmm0,%xmm12,%xmm12
|
|
vmovups %xmm13,-32(%rsi)
|
|
vpshufb %xmm0,%xmm13,%xmm13
|
|
vmovups %xmm14,-16(%rsi)
|
|
vpshufb %xmm0,%xmm14,%xmm14
|
|
vmovdqu %xmm9,16(%rsp)
|
|
vmovdqu 48(%rsp),%xmm6
|
|
vmovdqu 16-32(%r9),%xmm0
|
|
vpunpckhqdq %xmm6,%xmm6,%xmm2
|
|
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
|
|
vpxor %xmm6,%xmm2,%xmm2
|
|
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
|
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
|
|
|
vmovdqu 64(%rsp),%xmm9
|
|
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
|
|
vmovdqu 48-32(%r9),%xmm3
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
vpunpckhqdq %xmm9,%xmm9,%xmm5
|
|
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
|
|
vpxor %xmm9,%xmm5,%xmm5
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
|
vmovdqu 80-32(%r9),%xmm15
|
|
vpxor %xmm1,%xmm2,%xmm2
|
|
|
|
vmovdqu 80(%rsp),%xmm1
|
|
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
|
|
vmovdqu 64-32(%r9),%xmm0
|
|
vpxor %xmm4,%xmm7,%xmm7
|
|
vpunpckhqdq %xmm1,%xmm1,%xmm4
|
|
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
|
|
vpxor %xmm1,%xmm4,%xmm4
|
|
vpxor %xmm6,%xmm9,%xmm9
|
|
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
|
|
vpxor %xmm2,%xmm5,%xmm5
|
|
|
|
vmovdqu 96(%rsp),%xmm2
|
|
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
|
|
vmovdqu 96-32(%r9),%xmm3
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
vpunpckhqdq %xmm2,%xmm2,%xmm7
|
|
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
|
|
vpxor %xmm2,%xmm7,%xmm7
|
|
vpxor %xmm9,%xmm1,%xmm1
|
|
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
|
|
vmovdqu 128-32(%r9),%xmm15
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
|
|
vpxor 112(%rsp),%xmm8,%xmm8
|
|
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
|
|
vmovdqu 112-32(%r9),%xmm0
|
|
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
|
vpxor %xmm6,%xmm5,%xmm5
|
|
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
vpxor %xmm1,%xmm2,%xmm2
|
|
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
|
|
vpxor %xmm4,%xmm7,%xmm4
|
|
|
|
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
|
|
vmovdqu 0-32(%r9),%xmm3
|
|
vpunpckhqdq %xmm14,%xmm14,%xmm1
|
|
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
|
|
vpxor %xmm14,%xmm1,%xmm1
|
|
vpxor %xmm5,%xmm6,%xmm5
|
|
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
|
|
vmovdqu 32-32(%r9),%xmm15
|
|
vpxor %xmm2,%xmm8,%xmm7
|
|
vpxor %xmm4,%xmm9,%xmm6
|
|
|
|
vmovdqu 16-32(%r9),%xmm0
|
|
vpxor %xmm5,%xmm7,%xmm9
|
|
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
|
|
vpxor %xmm9,%xmm6,%xmm6
|
|
vpunpckhqdq %xmm13,%xmm13,%xmm2
|
|
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
|
|
vpxor %xmm13,%xmm2,%xmm2
|
|
vpslldq $8,%xmm6,%xmm9
|
|
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
|
vpxor %xmm9,%xmm5,%xmm8
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
|
|
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
|
|
vmovdqu 48-32(%r9),%xmm3
|
|
vpxor %xmm4,%xmm5,%xmm5
|
|
vpunpckhqdq %xmm12,%xmm12,%xmm9
|
|
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
|
|
vpxor %xmm12,%xmm9,%xmm9
|
|
vpxor %xmm14,%xmm13,%xmm13
|
|
vpalignr $8,%xmm8,%xmm8,%xmm14
|
|
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
|
vmovdqu 80-32(%r9),%xmm15
|
|
vpxor %xmm1,%xmm2,%xmm2
|
|
|
|
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
|
|
vmovdqu 64-32(%r9),%xmm0
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
vpunpckhqdq %xmm11,%xmm11,%xmm1
|
|
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
|
|
vpxor %xmm11,%xmm1,%xmm1
|
|
vpxor %xmm13,%xmm12,%xmm12
|
|
vxorps 16(%rsp),%xmm7,%xmm7
|
|
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
|
|
vpxor %xmm2,%xmm9,%xmm9
|
|
|
|
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
|
vxorps %xmm14,%xmm8,%xmm8
|
|
|
|
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
|
|
vmovdqu 96-32(%r9),%xmm3
|
|
vpxor %xmm4,%xmm5,%xmm5
|
|
vpunpckhqdq %xmm10,%xmm10,%xmm2
|
|
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
|
|
vpxor %xmm10,%xmm2,%xmm2
|
|
vpalignr $8,%xmm8,%xmm8,%xmm14
|
|
vpxor %xmm12,%xmm11,%xmm11
|
|
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
|
|
vmovdqu 128-32(%r9),%xmm15
|
|
vpxor %xmm9,%xmm1,%xmm1
|
|
|
|
vxorps %xmm7,%xmm14,%xmm14
|
|
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
|
vxorps %xmm14,%xmm8,%xmm8
|
|
|
|
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
|
|
vmovdqu 112-32(%r9),%xmm0
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
|
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
|
|
vpxor %xmm8,%xmm9,%xmm9
|
|
vpxor %xmm11,%xmm10,%xmm10
|
|
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
|
|
vpxor %xmm1,%xmm2,%xmm2
|
|
|
|
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
|
|
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
|
|
vpxor %xmm4,%xmm5,%xmm5
|
|
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
|
|
vpxor %xmm10,%xmm7,%xmm7
|
|
vpxor %xmm2,%xmm6,%xmm6
|
|
|
|
vpxor %xmm5,%xmm7,%xmm4
|
|
vpxor %xmm4,%xmm6,%xmm6
|
|
vpslldq $8,%xmm6,%xmm1
|
|
vmovdqu 16(%r11),%xmm3
|
|
vpsrldq $8,%xmm6,%xmm6
|
|
vpxor %xmm1,%xmm5,%xmm8
|
|
vpxor %xmm6,%xmm7,%xmm7
|
|
|
|
vpalignr $8,%xmm8,%xmm8,%xmm2
|
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
|
vpxor %xmm2,%xmm8,%xmm8
|
|
|
|
vpalignr $8,%xmm8,%xmm8,%xmm2
|
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
|
vpxor %xmm7,%xmm2,%xmm2
|
|
vpxor %xmm2,%xmm8,%xmm8
|
|
vpshufb (%r11),%xmm8,%xmm8
|
|
vmovdqu %xmm8,-64(%r9)
|
|
|
|
vzeroupper
|
|
movq -48(%rax),%r15
|
|
.cfi_restore %r15
|
|
movq -40(%rax),%r14
|
|
.cfi_restore %r14
|
|
movq -32(%rax),%r13
|
|
.cfi_restore %r13
|
|
movq -24(%rax),%r12
|
|
.cfi_restore %r12
|
|
movq -16(%rax),%rbp
|
|
.cfi_restore %rbp
|
|
movq -8(%rax),%rbx
|
|
.cfi_restore %rbx
|
|
leaq (%rax),%rsp
|
|
.cfi_def_cfa_register %rsp
|
|
.Lgcm_enc_abort:
|
|
movq %r10,%rax
|
|
.byte 0xf3,0xc3
|
|
.cfi_endproc
|
|
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
|
|
|
/* Some utility routines */
|
|
|
|
/*
|
|
* clear all fpu registers
|
|
* void clear_fpu_regs_avx(void);
|
|
*/
|
|
.globl clear_fpu_regs_avx
|
|
.type clear_fpu_regs_avx,@function
|
|
.align 32
|
|
clear_fpu_regs_avx:
|
|
vzeroall
|
|
ret
|
|
.size clear_fpu_regs_avx,.-clear_fpu_regs_avx
|
|
|
|
/*
|
|
* void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
|
|
*
|
|
* XORs one pair of unaligned 128-bit blocks from `src' and `dst' and
|
|
* stores the result at `dst'. The XOR is performed using FPU registers,
|
|
* so make sure FPU state is saved when running this in the kernel.
|
|
*/
|
|
.globl gcm_xor_avx
|
|
.type gcm_xor_avx,@function
|
|
.align 32
|
|
gcm_xor_avx:
|
|
movdqu (%rdi), %xmm0
|
|
movdqu (%rsi), %xmm1
|
|
pxor %xmm1, %xmm0
|
|
movdqu %xmm0, (%rsi)
|
|
ret
|
|
.size gcm_xor_avx,.-gcm_xor_avx
|
|
|
|
/*
|
|
* Toggle a boolean_t value atomically and return the new value.
|
|
* boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
|
|
*/
|
|
.globl atomic_toggle_boolean_nv
|
|
.type atomic_toggle_boolean_nv,@function
|
|
.align 32
|
|
atomic_toggle_boolean_nv:
|
|
xorl %eax, %eax
|
|
lock
|
|
xorl $1, (%rdi)
|
|
jz 1f
|
|
movl $1, %eax
|
|
1:
|
|
ret
|
|
.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv
|
|
|
|
.align 64
|
|
.Lbswap_mask:
|
|
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
|
.Lpoly:
|
|
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
|
.Lone_msb:
|
|
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
|
.Ltwo_lsb:
|
|
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
|
.Lone_lsb:
|
|
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
|
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 64
|
|
|
|
/* Mark the stack non-executable. */
|
|
#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|
|
|
|
#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */
|