mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-30 21:09:38 +03:00
a73c94934f
Currently, the ICP contains accelerated assembly code to be used specifically on CPUs with AES-NI enabled. This code makes heavy use of the movaps instruction which assumes that it will be provided aes keys that are 16 byte aligned. This assumption seems to hold on Illumos, but on Linux some kernel options such as 'slub_debug=P' will violate it. This patch changes all instances of this instruction to movups which is the same except that it can handle unaligned memory. This patch also adds a few flags which were accidentally never given to the assembly compiler, resulting in objtool warnings. Reviewed by: Gvozden Neskovic <neskovic@gmail.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Nathaniel R. Lewis <linux.robotdude@gmail.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7065 Closes #7108
750 lines
22 KiB
ArmAsm
750 lines
22 KiB
ArmAsm
/*
|
|
* ====================================================================
|
|
* Written by Intel Corporation for the OpenSSL project to add support
|
|
* for Intel AES-NI instructions. Rights for redistribution and usage
|
|
* in source and binary forms are granted according to the OpenSSL
|
|
* license.
|
|
*
|
|
* Author: Huang Ying <ying.huang at intel dot com>
|
|
* Vinodh Gopal <vinodh.gopal at intel dot com>
|
|
* Kahraman Akdemir
|
|
*
|
|
* Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
|
|
* instructions that are going to be introduced in the next generation
|
|
* of Intel processor, as of 2009. These instructions enable fast and
|
|
* secure data encryption and decryption, using the Advanced Encryption
|
|
* Standard (AES), defined by FIPS Publication number 197. The
|
|
* architecture introduces six instructions that offer full hardware
|
|
* support for AES. Four of them support high performance data
|
|
* encryption and decryption, and the other two instructions support
|
|
* the AES key expansion procedure.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
*
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
* endorse or promote products derived from this software without
|
|
* prior written permission. For written permission, please contact
|
|
* openssl-core@openssl.org.
|
|
*
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
* permission of the OpenSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* OpenSolaris OS modifications
|
|
*
|
|
* This source originates as files aes-intel.S and eng_aesni_asm.pl, in
|
|
* patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
|
|
* Huang Ying of Intel to the openssl-dev mailing list under the subject
|
|
* of "Add support to Intel AES-NI instruction set for x86_64 platform".
|
|
*
|
|
* This OpenSolaris version has these major changes from the original source:
|
|
*
|
|
* 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
|
|
* /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
|
|
* definitions for lint.
|
|
*
|
|
* 2. Formatted code, added comments, and added #includes and #defines.
|
|
*
|
|
* 3. If bit CR0.TS is set, clear and set the TS bit, after and before
|
|
* calling kpreempt_disable() and kpreempt_enable().
|
|
* If the TS bit is not set, Save and restore %xmm registers at the beginning
|
|
* and end of function calls (%xmm* registers are not saved and restored by
|
|
* during kernel thread preemption).
|
|
*
|
|
* 4. Renamed functions, reordered parameters, and changed return value
|
|
* to match OpenSolaris:
|
|
*
|
|
* OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return values for above are non-zero on error, 0 on success.
|
|
*
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* typedef struct aes_key_st {
|
|
* unsigned int rd_key[4 *(AES_MAXNR + 1)];
|
|
* int rounds;
|
|
* unsigned int pad[3];
|
|
* } AES_KEY;
|
|
* Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
|
|
* (ks32) instead of 64-bit (ks64).
|
|
* Number of rounds (aka round count) is at offset 240 of AES_KEY.
|
|
*
|
|
* OpenSolaris OS interface (#ifdefs removed for readability):
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* Return values for above are 0 on error, number of rounds on success.
|
|
*
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
|
|
* uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
|
|
*
|
|
* typedef union {
|
|
* uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
|
|
* } aes_ks_t;
|
|
* typedef struct aes_key {
|
|
* aes_ks_t encr_ks, decr_ks;
|
|
* long double align128;
|
|
* int flags, nr, type;
|
|
* } aes_key_t;
|
|
*
|
|
* Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
|
|
* ct is crypto text, and MAX_AES_NR is 14.
|
|
* For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
|
|
*
|
|
* Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
|
|
*
|
|
* ====================================================================
|
|
*/
|
|
|
|
|
|
#if defined(lint) || defined(__lint)
|
|
|
|
#include <sys/types.h>
|
|
|
|
/* ARGSUSED */
|
|
void
|
|
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
|
|
uint32_t ct[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
void
|
|
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
|
uint32_t pt[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
|
|
|
|
#else /* lint */
|
|
|
|
#define _ASM
|
|
#include <sys/asm_linkage.h>
|
|
|
|
|
|
/*
|
|
* _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
|
|
* _key_expansion_256a(), _key_expansion_256b()
|
|
*
|
|
* Helper functions called by rijndael_key_setup_inc_intel().
|
|
* Also used indirectly by rijndael_key_setup_dec_intel().
|
|
*
|
|
* Input:
|
|
* %xmm0 User-provided cipher key
|
|
* %xmm1 Round constant
|
|
* Output:
|
|
* (%rcx) AES key
|
|
*/
|
|
|
|
ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
|
|
_key_expansion_128_local:
|
|
_key_expansion_256a_local:
|
|
pshufd $0b11111111, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movups %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
nop
|
|
SET_SIZE(_key_expansion_128)
|
|
SET_SIZE(_key_expansion_256a)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_192a)
|
|
_key_expansion_192a_local:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movups %xmm2, %xmm5
|
|
movups %xmm2, %xmm6
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movups %xmm0, %xmm1
|
|
shufps $0b01000100, %xmm0, %xmm6
|
|
movups %xmm6, (%rcx)
|
|
shufps $0b01001110, %xmm2, %xmm1
|
|
movups %xmm1, 0x10(%rcx)
|
|
add $0x20, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_192a)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_192b)
|
|
_key_expansion_192b_local:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movups %xmm2, %xmm5
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movups %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_192b)
|
|
|
|
|
|
ENTRY_NP(_key_expansion_256b)
|
|
_key_expansion_256b_local:
|
|
pshufd $0b10101010, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
shufps $0b10001100, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
pxor %xmm1, %xmm2
|
|
movups %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_256b)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_enc_intel()
|
|
* Expand the cipher key into the encryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
|
|
#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
|
|
|
|
#define USERCIPHERKEY rdi /* P1, 64 bits */
|
|
#define KEYSIZE32 esi /* P2, 32 bits */
|
|
#define KEYSIZE64 rsi /* P2, 64 bits */
|
|
#define AESKEY rdx /* P3, 64 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define AESKEY rdi /* P1, 64 bits */
|
|
#define USERCIPHERKEY rsi /* P2, 64 bits */
|
|
#define KEYSIZE32 edx /* P3, 32 bits */
|
|
#define KEYSIZE64 rdx /* P3, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define ROUNDS32 KEYSIZE32 /* temp */
|
|
#define ROUNDS64 KEYSIZE64 /* temp */
|
|
#define ENDAESKEY USERCIPHERKEY /* temp */
|
|
|
|
ENTRY_NP(rijndael_key_setup_enc_intel)
|
|
rijndael_key_setup_enc_intel_local:
|
|
FRAME_BEGIN
|
|
// NULL pointer sanity check
|
|
test %USERCIPHERKEY, %USERCIPHERKEY
|
|
jz .Lenc_key_invalid_param
|
|
test %AESKEY, %AESKEY
|
|
jz .Lenc_key_invalid_param
|
|
|
|
movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
|
|
movups %xmm0, (%AESKEY)
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
|
|
|
|
cmp $256, %KEYSIZE32
|
|
jnz .Lenc_key192
|
|
|
|
// AES 256: 14 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $14, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
|
|
movups %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x1, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x2, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x4, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x8, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x10, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
aeskeygenassist $0x20, %xmm0, %xmm1
|
|
call _key_expansion_256b_local
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* Open Solaris Interface */
|
|
mov $14, %rax // return # rounds = 14
|
|
#endif
|
|
FRAME_END
|
|
ret
|
|
|
|
.align 4
|
|
.Lenc_key192:
|
|
cmp $192, %KEYSIZE32
|
|
jnz .Lenc_key128
|
|
|
|
// AES 192: 12 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $12, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a_local
|
|
aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $12, %rax // return # rounds = 12
|
|
#endif
|
|
FRAME_END
|
|
ret
|
|
|
|
.align 4
|
|
.Lenc_key128:
|
|
cmp $128, %KEYSIZE32
|
|
jnz .Lenc_key_invalid_key_bits
|
|
|
|
// AES 128: 10 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $10, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128_local
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $10, %rax // return # rounds = 10
|
|
#endif
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_key_invalid_param:
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $-1, %rax // user key or AES key pointer is NULL
|
|
FRAME_END
|
|
ret
|
|
#else
|
|
/* FALLTHROUGH */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
.Lenc_key_invalid_key_bits:
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $-2, %rax // keysize is invalid
|
|
#else /* Open Solaris Interface */
|
|
xor %rax, %rax // a key pointer is NULL or invalid keysize
|
|
#endif /* OPENSSL_INTERFACE */
|
|
FRAME_END
|
|
ret
|
|
SET_SIZE(rijndael_key_setup_enc_intel)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_dec_intel()
|
|
* Expand the cipher key into the decryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
* P1->P2, P2->P3, P3->P1
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
|
|
ENTRY_NP(rijndael_key_setup_dec_intel)
|
|
FRAME_BEGIN
|
|
// Generate round keys used for encryption
|
|
call rijndael_key_setup_enc_intel_local
|
|
test %rax, %rax
|
|
#ifdef OPENSSL_INTERFACE
|
|
jnz .Ldec_key_exit // Failed if returned non-0
|
|
#else /* OpenSolaris Interface */
|
|
jz .Ldec_key_exit // Failed if returned 0
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
/*
|
|
* Convert round keys used for encryption
|
|
* to a form usable for decryption
|
|
*/
|
|
#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
|
|
mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
|
|
// (already set for OpenSSL)
|
|
#endif
|
|
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
shl $4, %ROUNDS32
|
|
add %AESKEY, %ROUNDS64
|
|
mov %ROUNDS64, %ENDAESKEY
|
|
|
|
.align 4
|
|
.Ldec_key_reorder_loop:
|
|
movups (%AESKEY), %xmm0
|
|
movups (%ROUNDS64), %xmm1
|
|
movups %xmm0, (%ROUNDS64)
|
|
movups %xmm1, (%AESKEY)
|
|
lea 0x10(%AESKEY), %AESKEY
|
|
lea -0x10(%ROUNDS64), %ROUNDS64
|
|
cmp %AESKEY, %ROUNDS64
|
|
ja .Ldec_key_reorder_loop
|
|
|
|
.align 4
|
|
.Ldec_key_inv_loop:
|
|
movups (%rcx), %xmm0
|
|
// Convert an encryption round key to a form usable for decryption
|
|
// with the "AES Inverse Mix Columns" instruction
|
|
aesimc %xmm0, %xmm1
|
|
movups %xmm1, (%rcx)
|
|
lea 0x10(%rcx), %rcx
|
|
cmp %ENDAESKEY, %rcx
|
|
jnz .Ldec_key_inv_loop
|
|
|
|
.Ldec_key_exit:
|
|
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
|
|
// OpenSSL: rax = 0 for OK, or non-zero for error
|
|
FRAME_END
|
|
ret
|
|
SET_SIZE(rijndael_key_setup_dec_intel)
|
|
|
|
|
|
/*
|
|
* aes_encrypt_intel()
|
|
* Encrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key)
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define aes_encrypt_intel intel_AES_encrypt
|
|
#define aes_decrypt_intel intel_AES_decrypt
|
|
|
|
#define INP rdi /* P1, 64 bits */
|
|
#define OUTP rsi /* P2, 64 bits */
|
|
#define KEYP rdx /* P3, 64 bits */
|
|
|
|
/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
|
|
#define NROUNDS32 ecx /* temporary, 32 bits */
|
|
#define NROUNDS cl /* temporary, 8 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define KEYP rdi /* P1, 64 bits */
|
|
#define NROUNDS esi /* P2, 32 bits */
|
|
#define INP rdx /* P3, 64 bits */
|
|
#define OUTP rcx /* P4, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define STATE xmm0 /* temporary, 128 bits */
|
|
#define KEY xmm1 /* temporary, 128 bits */
|
|
|
|
|
|
ENTRY_NP(aes_encrypt_intel)
|
|
|
|
movups (%INP), %STATE // input
|
|
movups (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Lenc128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Lenc192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movups -0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc192:
|
|
// AES 192 and 256
|
|
movups -0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc128:
|
|
// AES 128, 192, and 256
|
|
movups -0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups -0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups (%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movups 0x70(%KEYP), %KEY
|
|
aesenclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
ret
|
|
SET_SIZE(aes_encrypt_intel)
|
|
|
|
|
|
/*
|
|
* aes_decrypt_intel()
|
|
* Decrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])/
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
*/
|
|
ENTRY_NP(aes_decrypt_intel)
|
|
|
|
movups (%INP), %STATE // input
|
|
movups (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Ldec128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Ldec192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movups -0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec192:
|
|
// AES 192 and 256
|
|
movups -0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec128:
|
|
// AES 128, 192, and 256
|
|
movups -0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups -0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups (%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movups 0x70(%KEYP), %KEY
|
|
aesdeclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
ret
|
|
SET_SIZE(aes_decrypt_intel)
|
|
|
|
#endif /* lint || __lint */
|
|
|
|
#ifdef __ELF__
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|