mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-29 12:29:35 +03:00
a3600a106d
If there is no explicit note in the .S files, the obj file will mark it as requiring an executable stack. This is unneeded and causes issues on hardened systems. More info: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart Signed-off-by: Jason Zaman <jason@perfinion.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #4947 Closes #4962
856 lines
24 KiB
ArmAsm
856 lines
24 KiB
ArmAsm
/*
|
|
* ====================================================================
|
|
* Written by Intel Corporation for the OpenSSL project to add support
|
|
* for Intel AES-NI instructions. Rights for redistribution and usage
|
|
* in source and binary forms are granted according to the OpenSSL
|
|
* license.
|
|
*
|
|
* Author: Huang Ying <ying.huang at intel dot com>
|
|
* Vinodh Gopal <vinodh.gopal at intel dot com>
|
|
* Kahraman Akdemir
|
|
*
|
|
* Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
|
|
* instructions that are going to be introduced in the next generation
|
|
* of Intel processor, as of 2009. These instructions enable fast and
|
|
* secure data encryption and decryption, using the Advanced Encryption
|
|
* Standard (AES), defined by FIPS Publication number 197. The
|
|
* architecture introduces six instructions that offer full hardware
|
|
* support for AES. Four of them support high performance data
|
|
* encryption and decryption, and the other two instructions support
|
|
* the AES key expansion procedure.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
*
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
* endorse or promote products derived from this software without
|
|
* prior written permission. For written permission, please contact
|
|
* openssl-core@openssl.org.
|
|
*
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
* permission of the OpenSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ====================================================================
|
|
*/
|
|
|
|
/*
|
|
* ====================================================================
|
|
* OpenSolaris OS modifications
|
|
*
|
|
* This source originates as files aes-intel.S and eng_aesni_asm.pl, in
|
|
* patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
|
|
* Huang Ying of Intel to the openssl-dev mailing list under the subject
|
|
* of "Add support to Intel AES-NI instruction set for x86_64 platform".
|
|
*
|
|
* This OpenSolaris version has these major changes from the original source:
|
|
*
|
|
* 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
|
|
* /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
|
|
* definitions for lint.
|
|
*
|
|
* 2. Formatted code, added comments, and added #includes and #defines.
|
|
*
|
|
* 3. If bit CR0.TS is set, clear and set the TS bit, after and before
|
|
* calling kpreempt_disable() and kpreempt_enable().
|
|
* If the TS bit is not set, Save and restore %xmm registers at the beginning
|
|
* and end of function calls (%xmm* registers are not saved and restored by
|
|
* during kernel thread preemption).
|
|
*
|
|
* 4. Renamed functions, reordered parameters, and changed return value
|
|
* to match OpenSolaris:
|
|
*
|
|
* OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return values for above are non-zero on error, 0 on success.
|
|
*
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
* typedef struct aes_key_st {
|
|
* unsigned int rd_key[4 *(AES_MAXNR + 1)];
|
|
* int rounds;
|
|
* unsigned int pad[3];
|
|
* } AES_KEY;
|
|
* Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
|
|
* (ks32) instead of 64-bit (ks64).
|
|
* Number of rounds (aka round count) is at offset 240 of AES_KEY.
|
|
*
|
|
* OpenSolaris OS interface (#ifdefs removed for readability):
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[],
|
|
* const uint32_t cipherKey[], uint64_t keyBits);
|
|
* Return values for above are 0 on error, number of rounds on success.
|
|
*
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4]);
|
|
* typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
|
|
* uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
|
|
*
|
|
* typedef union {
|
|
* uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
|
|
* } aes_ks_t;
|
|
* typedef struct aes_key {
|
|
* aes_ks_t encr_ks, decr_ks;
|
|
* long double align128;
|
|
* int flags, nr, type;
|
|
* } aes_key_t;
|
|
*
|
|
* Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
|
|
* ct is crypto text, and MAX_AES_NR is 14.
|
|
* For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
|
|
*
|
|
* Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
|
|
*
|
|
* ====================================================================
|
|
*/
|
|
|
|
#if defined(lint) || defined(__lint)
|
|
|
|
#include <sys/types.h>
|
|
|
|
/* ARGSUSED */
|
|
void
|
|
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
|
|
uint32_t ct[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
void
|
|
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
|
uint32_t pt[4]) {
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
/* ARGSUSED */
|
|
int
|
|
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
uint64_t keyBits) {
|
|
return (0);
|
|
}
|
|
|
|
|
|
#else /* lint */
|
|
|
|
#define _ASM
|
|
#include <sys/asm_linkage.h>
|
|
|
|
#ifdef _KERNEL
|
|
/*
|
|
* Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
|
|
* it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
|
|
* uses it to pass P2 to syscall.
|
|
* This also occurs with the STTS macro, but we dont care if
|
|
* P2 (%rsi) is modified just before function exit.
|
|
* The CLTS and STTS macros push and pop P1 (%rdi) already.
|
|
*/
|
|
#ifdef __xpv
|
|
#define PROTECTED_CLTS \
|
|
push %rsi; \
|
|
CLTS; \
|
|
pop %rsi
|
|
#else
|
|
#define PROTECTED_CLTS \
|
|
CLTS
|
|
#endif /* __xpv */
|
|
|
|
#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
|
|
push %rbp; \
|
|
mov %rsp, %rbp; \
|
|
movq %cr0, tmpreg; \
|
|
testq $CR0_TS, tmpreg; \
|
|
jnz 1f; \
|
|
and $-XMM_ALIGN, %rsp; \
|
|
sub $[XMM_SIZE * 2], %rsp; \
|
|
movaps %xmm0, 16(%rsp); \
|
|
movaps %xmm1, (%rsp); \
|
|
jmp 2f; \
|
|
1: \
|
|
PROTECTED_CLTS; \
|
|
2:
|
|
|
|
/*
|
|
* If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
|
|
* otherwise set CR0_TS.
|
|
*/
|
|
#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
|
|
testq $CR0_TS, tmpreg; \
|
|
jnz 1f; \
|
|
movaps (%rsp), %xmm1; \
|
|
movaps 16(%rsp), %xmm0; \
|
|
jmp 2f; \
|
|
1: \
|
|
STTS(tmpreg); \
|
|
2: \
|
|
mov %rbp, %rsp; \
|
|
pop %rbp
|
|
|
|
/*
|
|
* If CR0_TS is not set, align stack (with push %rbp) and push
|
|
* %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
|
|
*/
|
|
#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
|
|
push %rbp; \
|
|
mov %rsp, %rbp; \
|
|
movq %cr0, tmpreg; \
|
|
testq $CR0_TS, tmpreg; \
|
|
jnz 1f; \
|
|
and $-XMM_ALIGN, %rsp; \
|
|
sub $[XMM_SIZE * 7], %rsp; \
|
|
movaps %xmm0, 96(%rsp); \
|
|
movaps %xmm1, 80(%rsp); \
|
|
movaps %xmm2, 64(%rsp); \
|
|
movaps %xmm3, 48(%rsp); \
|
|
movaps %xmm4, 32(%rsp); \
|
|
movaps %xmm5, 16(%rsp); \
|
|
movaps %xmm6, (%rsp); \
|
|
jmp 2f; \
|
|
1: \
|
|
PROTECTED_CLTS; \
|
|
2:
|
|
|
|
|
|
/*
|
|
* If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
|
|
* otherwise set CR0_TS.
|
|
*/
|
|
#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
|
|
testq $CR0_TS, tmpreg; \
|
|
jnz 1f; \
|
|
movaps (%rsp), %xmm6; \
|
|
movaps 16(%rsp), %xmm5; \
|
|
movaps 32(%rsp), %xmm4; \
|
|
movaps 48(%rsp), %xmm3; \
|
|
movaps 64(%rsp), %xmm2; \
|
|
movaps 80(%rsp), %xmm1; \
|
|
movaps 96(%rsp), %xmm0; \
|
|
jmp 2f; \
|
|
1: \
|
|
STTS(tmpreg); \
|
|
2: \
|
|
mov %rbp, %rsp; \
|
|
pop %rbp
|
|
|
|
|
|
#else
|
|
#define PROTECTED_CLTS
|
|
#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
|
|
#define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
|
|
#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
|
|
#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
|
|
#endif /* _KERNEL */
|
|
|
|
|
|
/*
|
|
* _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
|
|
* _key_expansion_256a(), _key_expansion_256b()
|
|
*
|
|
* Helper functions called by rijndael_key_setup_inc_intel().
|
|
* Also used indirectly by rijndael_key_setup_dec_intel().
|
|
*
|
|
* Input:
|
|
* %xmm0 User-provided cipher key
|
|
* %xmm1 Round constant
|
|
* Output:
|
|
* (%rcx) AES key
|
|
*/
|
|
|
|
.align 16
|
|
_key_expansion_128:
|
|
_key_expansion_256a:
|
|
pshufd $0b11111111, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
movaps %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_128)
|
|
SET_SIZE(_key_expansion_256a)
|
|
|
|
.align 16
|
|
_key_expansion_192a:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movaps %xmm2, %xmm5
|
|
movaps %xmm2, %xmm6
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movaps %xmm0, %xmm1
|
|
shufps $0b01000100, %xmm0, %xmm6
|
|
movaps %xmm6, (%rcx)
|
|
shufps $0b01001110, %xmm2, %xmm1
|
|
movaps %xmm1, 0x10(%rcx)
|
|
add $0x20, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_192a)
|
|
|
|
.align 16
|
|
_key_expansion_192b:
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
pxor %xmm4, %xmm0
|
|
pxor %xmm1, %xmm0
|
|
|
|
movaps %xmm2, %xmm5
|
|
pslldq $4, %xmm5
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
pxor %xmm3, %xmm2
|
|
pxor %xmm5, %xmm2
|
|
|
|
movaps %xmm0, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_192b)
|
|
|
|
.align 16
|
|
_key_expansion_256b:
|
|
pshufd $0b10101010, %xmm1, %xmm1
|
|
shufps $0b00010000, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
shufps $0b10001100, %xmm2, %xmm4
|
|
pxor %xmm4, %xmm2
|
|
pxor %xmm1, %xmm2
|
|
movaps %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
ret
|
|
SET_SIZE(_key_expansion_256b)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_enc_intel()
|
|
* Expand the cipher key into the encryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_encrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
|
|
#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
|
|
|
|
#define USERCIPHERKEY rdi /* P1, 64 bits */
|
|
#define KEYSIZE32 esi /* P2, 32 bits */
|
|
#define KEYSIZE64 rsi /* P2, 64 bits */
|
|
#define AESKEY rdx /* P3, 64 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define AESKEY rdi /* P1, 64 bits */
|
|
#define USERCIPHERKEY rsi /* P2, 64 bits */
|
|
#define KEYSIZE32 edx /* P3, 32 bits */
|
|
#define KEYSIZE64 rdx /* P3, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define ROUNDS32 KEYSIZE32 /* temp */
|
|
#define ROUNDS64 KEYSIZE64 /* temp */
|
|
#define ENDAESKEY USERCIPHERKEY /* temp */
|
|
|
|
ENTRY_NP(rijndael_key_setup_enc_intel)
|
|
rijndael_key_setup_enc_intel_local:
|
|
CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
|
|
|
|
// NULL pointer sanity check
|
|
test %USERCIPHERKEY, %USERCIPHERKEY
|
|
jz .Lenc_key_invalid_param
|
|
test %AESKEY, %AESKEY
|
|
jz .Lenc_key_invalid_param
|
|
|
|
movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
|
|
movaps %xmm0, (%AESKEY)
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
|
|
|
|
cmp $256, %KEYSIZE32
|
|
jnz .Lenc_key192
|
|
|
|
// AES 256: 14 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $14, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
|
|
movaps %xmm2, (%rcx)
|
|
add $0x10, %rcx
|
|
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x1, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x2, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x4, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x8, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x10, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
aeskeygenassist $0x20, %xmm0, %xmm1
|
|
call _key_expansion_256b
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_256a
|
|
|
|
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* Open Solaris Interface */
|
|
mov $14, %rax // return # rounds = 14
|
|
#endif
|
|
ret
|
|
|
|
.align 4
|
|
.Lenc_key192:
|
|
cmp $192, %KEYSIZE32
|
|
jnz .Lenc_key128
|
|
|
|
// AES 192: 12 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $12, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192a
|
|
aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
|
|
call _key_expansion_192b
|
|
|
|
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $12, %rax // return # rounds = 12
|
|
#endif
|
|
ret
|
|
|
|
.align 4
|
|
.Lenc_key128:
|
|
cmp $128, %KEYSIZE32
|
|
jnz .Lenc_key_invalid_key_bits
|
|
|
|
// AES 128: 10 rounds in encryption key schedule
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $10, %ROUNDS32
|
|
movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
|
|
call _key_expansion_128
|
|
|
|
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
|
|
#ifdef OPENSSL_INTERFACE
|
|
xor %rax, %rax // return 0 (OK)
|
|
#else /* OpenSolaris Interface */
|
|
mov $10, %rax // return # rounds = 10
|
|
#endif
|
|
ret
|
|
|
|
.Lenc_key_invalid_param:
|
|
#ifdef OPENSSL_INTERFACE
|
|
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
|
|
mov $-1, %rax // user key or AES key pointer is NULL
|
|
ret
|
|
#else
|
|
/* FALLTHROUGH */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
.Lenc_key_invalid_key_bits:
|
|
SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov $-2, %rax // keysize is invalid
|
|
#else /* Open Solaris Interface */
|
|
xor %rax, %rax // a key pointer is NULL or invalid keysize
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
ret
|
|
SET_SIZE(rijndael_key_setup_enc_intel)
|
|
|
|
|
|
/*
|
|
* rijndael_key_setup_dec_intel()
|
|
* Expand the cipher key into the decryption key schedule.
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* OpenSolaris interface:
|
|
* int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
|
|
* uint64_t keyBits);
|
|
* Return value is 0 on error, number of rounds on success.
|
|
* P1->P2, P2->P3, P3->P1
|
|
*
|
|
* Original Intel OpenSSL interface:
|
|
* int intel_AES_set_decrypt_key(const unsigned char *userKey,
|
|
* const int bits, AES_KEY *key);
|
|
* Return value is non-zero on error, 0 on success.
|
|
*/
|
|
ENTRY_NP(rijndael_key_setup_dec_intel)
|
|
// Generate round keys used for encryption
|
|
call rijndael_key_setup_enc_intel_local
|
|
test %rax, %rax
|
|
#ifdef OPENSSL_INTERFACE
|
|
jnz .Ldec_key_exit // Failed if returned non-0
|
|
#else /* OpenSolaris Interface */
|
|
jz .Ldec_key_exit // Failed if returned 0
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
|
|
|
|
/*
|
|
* Convert round keys used for encryption
|
|
* to a form usable for decryption
|
|
*/
|
|
#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
|
|
mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
|
|
// (already set for OpenSSL)
|
|
#endif
|
|
|
|
lea 0x10(%AESKEY), %rcx // key addr
|
|
shl $4, %ROUNDS32
|
|
add %AESKEY, %ROUNDS64
|
|
mov %ROUNDS64, %ENDAESKEY
|
|
|
|
.align 4
|
|
.Ldec_key_reorder_loop:
|
|
movaps (%AESKEY), %xmm0
|
|
movaps (%ROUNDS64), %xmm1
|
|
movaps %xmm0, (%ROUNDS64)
|
|
movaps %xmm1, (%AESKEY)
|
|
lea 0x10(%AESKEY), %AESKEY
|
|
lea -0x10(%ROUNDS64), %ROUNDS64
|
|
cmp %AESKEY, %ROUNDS64
|
|
ja .Ldec_key_reorder_loop
|
|
|
|
.align 4
|
|
.Ldec_key_inv_loop:
|
|
movaps (%rcx), %xmm0
|
|
// Convert an encryption round key to a form usable for decryption
|
|
// with the "AES Inverse Mix Columns" instruction
|
|
aesimc %xmm0, %xmm1
|
|
movaps %xmm1, (%rcx)
|
|
lea 0x10(%rcx), %rcx
|
|
cmp %ENDAESKEY, %rcx
|
|
jnz .Ldec_key_inv_loop
|
|
|
|
SET_TS_OR_POP_XMM0_XMM1(%r10)
|
|
|
|
.Ldec_key_exit:
|
|
// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
|
|
// OpenSSL: rax = 0 for OK, or non-zero for error
|
|
ret
|
|
SET_SIZE(rijndael_key_setup_dec_intel)
|
|
|
|
|
|
/*
|
|
* aes_encrypt_intel()
|
|
* Encrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key)
|
|
*/
|
|
|
|
#ifdef OPENSSL_INTERFACE
|
|
#define aes_encrypt_intel intel_AES_encrypt
|
|
#define aes_decrypt_intel intel_AES_decrypt
|
|
|
|
#define INP rdi /* P1, 64 bits */
|
|
#define OUTP rsi /* P2, 64 bits */
|
|
#define KEYP rdx /* P3, 64 bits */
|
|
|
|
/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
|
|
#define NROUNDS32 ecx /* temporary, 32 bits */
|
|
#define NROUNDS cl /* temporary, 8 bits */
|
|
|
|
#else /* OpenSolaris Interface */
|
|
#define KEYP rdi /* P1, 64 bits */
|
|
#define NROUNDS esi /* P2, 32 bits */
|
|
#define INP rdx /* P3, 64 bits */
|
|
#define OUTP rcx /* P4, 64 bits */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
#define STATE xmm0 /* temporary, 128 bits */
|
|
#define KEY xmm1 /* temporary, 128 bits */
|
|
|
|
ENTRY_NP(aes_encrypt_intel)
|
|
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
|
|
|
|
movups (%INP), %STATE // input
|
|
movaps (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Lenc128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Lenc192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movaps -0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps -0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc192:
|
|
// AES 192 and 256
|
|
movaps -0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps -0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
|
|
.align 4
|
|
.Lenc128:
|
|
// AES 128, 192, and 256
|
|
movaps -0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps -0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps (%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x10(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x20(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x30(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x40(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x50(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x60(%KEYP), %KEY
|
|
aesenc %KEY, %STATE
|
|
movaps 0x70(%KEYP), %KEY
|
|
aesenclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
SET_TS_OR_POP_XMM0_XMM1(%r10)
|
|
ret
|
|
SET_SIZE(aes_encrypt_intel)
|
|
|
|
|
|
/*
|
|
* aes_decrypt_intel()
|
|
* Decrypt a single block (in and out can overlap).
|
|
*
|
|
* For kernel code, caller is responsible for ensuring kpreempt_disable()
|
|
* has been called. This is because %xmm registers are not saved/restored.
|
|
* Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
|
|
* on entry. Otherwise, if TS is not set, save and restore %xmm registers
|
|
* on the stack.
|
|
*
|
|
* Temporary register usage:
|
|
* %xmm0 State
|
|
* %xmm1 Key
|
|
*
|
|
* Original OpenSolaris Interface:
|
|
* void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
|
|
* const uint32_t pt[4], uint32_t ct[4])/
|
|
*
|
|
* Original Intel OpenSSL Interface:
|
|
* void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
* const AES_KEY *key);
|
|
*/
|
|
ENTRY_NP(aes_decrypt_intel)
|
|
CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
|
|
|
|
movups (%INP), %STATE // input
|
|
movaps (%KEYP), %KEY // key
|
|
#ifdef OPENSSL_INTERFACE
|
|
mov 240(%KEYP), %NROUNDS32 // round count
|
|
#else /* OpenSolaris Interface */
|
|
/* Round count is already present as P2 in %rsi/%esi */
|
|
#endif /* OPENSSL_INTERFACE */
|
|
|
|
pxor %KEY, %STATE // round 0
|
|
lea 0x30(%KEYP), %KEYP
|
|
cmp $12, %NROUNDS
|
|
jb .Ldec128
|
|
lea 0x20(%KEYP), %KEYP
|
|
je .Ldec192
|
|
|
|
// AES 256
|
|
lea 0x20(%KEYP), %KEYP
|
|
movaps -0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps -0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec192:
|
|
// AES 192 and 256
|
|
movaps -0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps -0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
|
|
.align 4
|
|
.Ldec128:
|
|
// AES 128, 192, and 256
|
|
movaps -0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps -0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps (%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x10(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x20(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x30(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x40(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x50(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x60(%KEYP), %KEY
|
|
aesdec %KEY, %STATE
|
|
movaps 0x70(%KEYP), %KEY
|
|
aesdeclast %KEY, %STATE // last round
|
|
movups %STATE, (%OUTP) // output
|
|
|
|
SET_TS_OR_POP_XMM0_XMM1(%r10)
|
|
ret
|
|
SET_SIZE(aes_decrypt_intel)
|
|
|
|
#endif /* lint || __lint */
|
|
|
|
#ifdef __ELF__
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|