mirror_zfs/module/icp/asm-x86_64/aes/aes_intel.S
Tom Caputi a73c94934f Change movaps to movups in AES-NI code
Currently, the ICP contains accelerated assembly code to be
used specifically on CPUs with AES-NI enabled. This code
makes heavy use of the movaps instruction which assumes that
it will be provided aes keys that are 16 byte aligned. This
assumption seems to hold on Illumos, but on Linux some kernel
options such as 'slub_debug=P' will violate it. This patch
changes all instances of this instruction to movups which is
the same except that it can handle unaligned memory.

This patch also adds a few flags which were accidentally never
given to the assembly compiler, resulting in objtool warnings.

Reviewed by: Gvozden Neskovic <neskovic@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Nathaniel R. Lewis <linux.robotdude@gmail.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7065 
Closes #7108
2018-01-31 15:17:56 -08:00

750 lines
22 KiB
ArmAsm

/*
* ====================================================================
* Written by Intel Corporation for the OpenSSL project to add support
* for Intel AES-NI instructions. Rights for redistribution and usage
* in source and binary forms are granted according to the OpenSSL
* license.
*
* Author: Huang Ying <ying.huang at intel dot com>
* Vinodh Gopal <vinodh.gopal at intel dot com>
* Kahraman Akdemir
*
* Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
* instructions that are going to be introduced in the next generation
* of Intel processor, as of 2009. These instructions enable fast and
* secure data encryption and decryption, using the Advanced Encryption
* Standard (AES), defined by FIPS Publication number 197. The
* architecture introduces six instructions that offer full hardware
* support for AES. Four of them support high performance data
* encryption and decryption, and the other two instructions support
* the AES key expansion procedure.
* ====================================================================
*/
/*
* ====================================================================
* Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*/
/*
* ====================================================================
* OpenSolaris OS modifications
*
* This source originates as files aes-intel.S and eng_aesni_asm.pl, in
* patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
* Huang Ying of Intel to the openssl-dev mailing list under the subject
* of "Add support to Intel AES-NI instruction set for x86_64 platform".
*
* This OpenSolaris version has these major changes from the original source:
*
* 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
* /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
* definitions for lint.
*
* 2. Formatted code, added comments, and added #includes and #defines.
*
* 3. If bit CR0.TS is set, clear and set the TS bit, after and before
* calling kpreempt_disable() and kpreempt_enable().
* If the TS bit is not set, Save and restore %xmm registers at the beginning
* and end of function calls (%xmm* registers are not saved and restored by
* during kernel thread preemption).
*
* 4. Renamed functions, reordered parameters, and changed return value
* to match OpenSolaris:
*
* OpenSSL interface:
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
* const int bits, AES_KEY *key);
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
* const int bits, AES_KEY *key);
* Return values for above are non-zero on error, 0 on success.
*
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
* const AES_KEY *key);
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
* const AES_KEY *key);
* typedef struct aes_key_st {
* unsigned int rd_key[4 *(AES_MAXNR + 1)];
* int rounds;
* unsigned int pad[3];
* } AES_KEY;
* Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
* (ks32) instead of 64-bit (ks64).
* Number of rounds (aka round count) is at offset 240 of AES_KEY.
*
* OpenSolaris OS interface (#ifdefs removed for readability):
* int rijndael_key_setup_dec_intel(uint32_t rk[],
* const uint32_t cipherKey[], uint64_t keyBits);
* int rijndael_key_setup_enc_intel(uint32_t rk[],
* const uint32_t cipherKey[], uint64_t keyBits);
* Return values for above are 0 on error, number of rounds on success.
*
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4]);
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4]);
* typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
* uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
*
* typedef union {
* uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
* } aes_ks_t;
* typedef struct aes_key {
* aes_ks_t encr_ks, decr_ks;
* long double align128;
* int flags, nr, type;
* } aes_key_t;
*
* Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
* ct is crypto text, and MAX_AES_NR is 14.
* For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
*
* Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
*
* ====================================================================
*/
#if defined(lint) || defined(__lint)
#include <sys/types.h>
/* ARGSUSED */
void
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
uint32_t ct[4]) {
}
/* ARGSUSED */
void
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
uint32_t pt[4]) {
}
/* ARGSUSED */
int
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
return (0);
}
/* ARGSUSED */
int
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
uint64_t keyBits) {
return (0);
}
#else /* lint */
#define _ASM
#include <sys/asm_linkage.h>
/*
* _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
* _key_expansion_256a(), _key_expansion_256b()
*
* Helper functions called by rijndael_key_setup_inc_intel().
* Also used indirectly by rijndael_key_setup_dec_intel().
*
* Input:
* %xmm0 User-provided cipher key
* %xmm1 Round constant
* Output:
* (%rcx) AES key
*/
ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
_key_expansion_128_local:
_key_expansion_256a_local:
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movups %xmm0, (%rcx)
add $0x10, %rcx
ret
nop
SET_SIZE(_key_expansion_128)
SET_SIZE(_key_expansion_256a)
ENTRY_NP(_key_expansion_192a)
_key_expansion_192a_local:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movups %xmm2, %xmm5
movups %xmm2, %xmm6
pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
movups %xmm0, %xmm1
shufps $0b01000100, %xmm0, %xmm6
movups %xmm6, (%rcx)
shufps $0b01001110, %xmm2, %xmm1
movups %xmm1, 0x10(%rcx)
add $0x20, %rcx
ret
SET_SIZE(_key_expansion_192a)
ENTRY_NP(_key_expansion_192b)
_key_expansion_192b_local:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movups %xmm2, %xmm5
pslldq $4, %xmm5
pshufd $0b11111111, %xmm0, %xmm3
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
movups %xmm0, (%rcx)
add $0x10, %rcx
ret
SET_SIZE(_key_expansion_192b)
ENTRY_NP(_key_expansion_256b)
_key_expansion_256b_local:
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
pxor %xmm4, %xmm2
shufps $0b10001100, %xmm2, %xmm4
pxor %xmm4, %xmm2
pxor %xmm1, %xmm2
movups %xmm2, (%rcx)
add $0x10, %rcx
ret
SET_SIZE(_key_expansion_256b)
/*
* rijndael_key_setup_enc_intel()
* Expand the cipher key into the encryption key schedule.
*
* For kernel code, caller is responsible for ensuring kpreempt_disable()
* has been called. This is because %xmm registers are not saved/restored.
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
* on the stack.
*
* OpenSolaris interface:
* int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
* uint64_t keyBits);
* Return value is 0 on error, number of rounds on success.
*
* Original Intel OpenSSL interface:
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
* const int bits, AES_KEY *key);
* Return value is non-zero on error, 0 on success.
*/
#ifdef OPENSSL_INTERFACE
#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
#define USERCIPHERKEY rdi /* P1, 64 bits */
#define KEYSIZE32 esi /* P2, 32 bits */
#define KEYSIZE64 rsi /* P2, 64 bits */
#define AESKEY rdx /* P3, 64 bits */
#else /* OpenSolaris Interface */
#define AESKEY rdi /* P1, 64 bits */
#define USERCIPHERKEY rsi /* P2, 64 bits */
#define KEYSIZE32 edx /* P3, 32 bits */
#define KEYSIZE64 rdx /* P3, 64 bits */
#endif /* OPENSSL_INTERFACE */
#define ROUNDS32 KEYSIZE32 /* temp */
#define ROUNDS64 KEYSIZE64 /* temp */
#define ENDAESKEY USERCIPHERKEY /* temp */
ENTRY_NP(rijndael_key_setup_enc_intel)
rijndael_key_setup_enc_intel_local:
FRAME_BEGIN
// NULL pointer sanity check
test %USERCIPHERKEY, %USERCIPHERKEY
jz .Lenc_key_invalid_param
test %AESKEY, %AESKEY
jz .Lenc_key_invalid_param
movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
movups %xmm0, (%AESKEY)
lea 0x10(%AESKEY), %rcx // key addr
pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
cmp $256, %KEYSIZE32
jnz .Lenc_key192
// AES 256: 14 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $14, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
#endif /* OPENSSL_INTERFACE */
movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
movups %xmm2, (%rcx)
add $0x10, %rcx
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x1, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x2, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x4, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x8, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x10, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
aeskeygenassist $0x20, %xmm0, %xmm1
call _key_expansion_256b_local
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
call _key_expansion_256a_local
#ifdef OPENSSL_INTERFACE
xor %rax, %rax // return 0 (OK)
#else /* Open Solaris Interface */
mov $14, %rax // return # rounds = 14
#endif
FRAME_END
ret
.align 4
.Lenc_key192:
cmp $192, %KEYSIZE32
jnz .Lenc_key128
// AES 192: 12 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $12, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
#endif /* OPENSSL_INTERFACE */
movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
call _key_expansion_192a_local
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
call _key_expansion_192b_local
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
call _key_expansion_192a_local
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
call _key_expansion_192b_local
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
call _key_expansion_192a_local
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
call _key_expansion_192b_local
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
call _key_expansion_192a_local
aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
call _key_expansion_192b_local
#ifdef OPENSSL_INTERFACE
xor %rax, %rax // return 0 (OK)
#else /* OpenSolaris Interface */
mov $12, %rax // return # rounds = 12
#endif
FRAME_END
ret
.align 4
.Lenc_key128:
cmp $128, %KEYSIZE32
jnz .Lenc_key_invalid_key_bits
// AES 128: 10 rounds in encryption key schedule
#ifdef OPENSSL_INTERFACE
mov $10, %ROUNDS32
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
#endif /* OPENSSL_INTERFACE */
aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
call _key_expansion_128_local
#ifdef OPENSSL_INTERFACE
xor %rax, %rax // return 0 (OK)
#else /* OpenSolaris Interface */
mov $10, %rax // return # rounds = 10
#endif
FRAME_END
ret
.Lenc_key_invalid_param:
#ifdef OPENSSL_INTERFACE
mov $-1, %rax // user key or AES key pointer is NULL
FRAME_END
ret
#else
/* FALLTHROUGH */
#endif /* OPENSSL_INTERFACE */
.Lenc_key_invalid_key_bits:
#ifdef OPENSSL_INTERFACE
mov $-2, %rax // keysize is invalid
#else /* Open Solaris Interface */
xor %rax, %rax // a key pointer is NULL or invalid keysize
#endif /* OPENSSL_INTERFACE */
FRAME_END
ret
SET_SIZE(rijndael_key_setup_enc_intel)
/*
* rijndael_key_setup_dec_intel()
* Expand the cipher key into the decryption key schedule.
*
* For kernel code, caller is responsible for ensuring kpreempt_disable()
* has been called. This is because %xmm registers are not saved/restored.
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
* on the stack.
*
* OpenSolaris interface:
* int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
* uint64_t keyBits);
* Return value is 0 on error, number of rounds on success.
* P1->P2, P2->P3, P3->P1
*
* Original Intel OpenSSL interface:
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
* const int bits, AES_KEY *key);
* Return value is non-zero on error, 0 on success.
*/
ENTRY_NP(rijndael_key_setup_dec_intel)
FRAME_BEGIN
// Generate round keys used for encryption
call rijndael_key_setup_enc_intel_local
test %rax, %rax
#ifdef OPENSSL_INTERFACE
jnz .Ldec_key_exit // Failed if returned non-0
#else /* OpenSolaris Interface */
jz .Ldec_key_exit // Failed if returned 0
#endif /* OPENSSL_INTERFACE */
/*
* Convert round keys used for encryption
* to a form usable for decryption
*/
#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
// (already set for OpenSSL)
#endif
lea 0x10(%AESKEY), %rcx // key addr
shl $4, %ROUNDS32
add %AESKEY, %ROUNDS64
mov %ROUNDS64, %ENDAESKEY
.align 4
.Ldec_key_reorder_loop:
movups (%AESKEY), %xmm0
movups (%ROUNDS64), %xmm1
movups %xmm0, (%ROUNDS64)
movups %xmm1, (%AESKEY)
lea 0x10(%AESKEY), %AESKEY
lea -0x10(%ROUNDS64), %ROUNDS64
cmp %AESKEY, %ROUNDS64
ja .Ldec_key_reorder_loop
.align 4
.Ldec_key_inv_loop:
movups (%rcx), %xmm0
// Convert an encryption round key to a form usable for decryption
// with the "AES Inverse Mix Columns" instruction
aesimc %xmm0, %xmm1
movups %xmm1, (%rcx)
lea 0x10(%rcx), %rcx
cmp %ENDAESKEY, %rcx
jnz .Ldec_key_inv_loop
.Ldec_key_exit:
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
// OpenSSL: rax = 0 for OK, or non-zero for error
FRAME_END
ret
SET_SIZE(rijndael_key_setup_dec_intel)
/*
* aes_encrypt_intel()
* Encrypt a single block (in and out can overlap).
*
* For kernel code, caller is responsible for ensuring kpreempt_disable()
* has been called. This is because %xmm registers are not saved/restored.
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
* on the stack.
*
* Temporary register usage:
* %xmm0 State
* %xmm1 Key
*
* Original OpenSolaris Interface:
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])
*
* Original Intel OpenSSL Interface:
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
* const AES_KEY *key)
*/
#ifdef OPENSSL_INTERFACE
#define aes_encrypt_intel intel_AES_encrypt
#define aes_decrypt_intel intel_AES_decrypt
#define INP rdi /* P1, 64 bits */
#define OUTP rsi /* P2, 64 bits */
#define KEYP rdx /* P3, 64 bits */
/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
#define NROUNDS32 ecx /* temporary, 32 bits */
#define NROUNDS cl /* temporary, 8 bits */
#else /* OpenSolaris Interface */
#define KEYP rdi /* P1, 64 bits */
#define NROUNDS esi /* P2, 32 bits */
#define INP rdx /* P3, 64 bits */
#define OUTP rcx /* P4, 64 bits */
#endif /* OPENSSL_INTERFACE */
#define STATE xmm0 /* temporary, 128 bits */
#define KEY xmm1 /* temporary, 128 bits */
ENTRY_NP(aes_encrypt_intel)
movups (%INP), %STATE // input
movups (%KEYP), %KEY // key
#ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 // round count
#else /* OpenSolaris Interface */
/* Round count is already present as P2 in %rsi/%esi */
#endif /* OPENSSL_INTERFACE */
pxor %KEY, %STATE // round 0
lea 0x30(%KEYP), %KEYP
cmp $12, %NROUNDS
jb .Lenc128
lea 0x20(%KEYP), %KEYP
je .Lenc192
// AES 256
lea 0x20(%KEYP), %KEYP
movups -0x60(%KEYP), %KEY
aesenc %KEY, %STATE
movups -0x50(%KEYP), %KEY
aesenc %KEY, %STATE
.align 4
.Lenc192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
aesenc %KEY, %STATE
movups -0x30(%KEYP), %KEY
aesenc %KEY, %STATE
.align 4
.Lenc128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
aesenc %KEY, %STATE
movups -0x10(%KEYP), %KEY
aesenc %KEY, %STATE
movups (%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x10(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x20(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x30(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x40(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x50(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x60(%KEYP), %KEY
aesenc %KEY, %STATE
movups 0x70(%KEYP), %KEY
aesenclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
ret
SET_SIZE(aes_encrypt_intel)
/*
* aes_decrypt_intel()
* Decrypt a single block (in and out can overlap).
*
* For kernel code, caller is responsible for ensuring kpreempt_disable()
* has been called. This is because %xmm registers are not saved/restored.
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
* on the stack.
*
* Temporary register usage:
* %xmm0 State
* %xmm1 Key
*
* Original OpenSolaris Interface:
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])/
*
* Original Intel OpenSSL Interface:
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
* const AES_KEY *key);
*/
ENTRY_NP(aes_decrypt_intel)
movups (%INP), %STATE // input
movups (%KEYP), %KEY // key
#ifdef OPENSSL_INTERFACE
mov 240(%KEYP), %NROUNDS32 // round count
#else /* OpenSolaris Interface */
/* Round count is already present as P2 in %rsi/%esi */
#endif /* OPENSSL_INTERFACE */
pxor %KEY, %STATE // round 0
lea 0x30(%KEYP), %KEYP
cmp $12, %NROUNDS
jb .Ldec128
lea 0x20(%KEYP), %KEYP
je .Ldec192
// AES 256
lea 0x20(%KEYP), %KEYP
movups -0x60(%KEYP), %KEY
aesdec %KEY, %STATE
movups -0x50(%KEYP), %KEY
aesdec %KEY, %STATE
.align 4
.Ldec192:
// AES 192 and 256
movups -0x40(%KEYP), %KEY
aesdec %KEY, %STATE
movups -0x30(%KEYP), %KEY
aesdec %KEY, %STATE
.align 4
.Ldec128:
// AES 128, 192, and 256
movups -0x20(%KEYP), %KEY
aesdec %KEY, %STATE
movups -0x10(%KEYP), %KEY
aesdec %KEY, %STATE
movups (%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x10(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x20(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x30(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x40(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x50(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x60(%KEYP), %KEY
aesdec %KEY, %STATE
movups 0x70(%KEYP), %KEY
aesdeclast %KEY, %STATE // last round
movups %STATE, (%OUTP) // output
ret
SET_SIZE(aes_decrypt_intel)
#endif /* lint || __lint */
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits
#endif