mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
		
			
				
	
	
		
			750 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			750 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
// SPDX-License-Identifier: OpenSSL-standalone
 | 
						|
/*
 | 
						|
 * ====================================================================
 | 
						|
 * Written by Intel Corporation for the OpenSSL project to add support
 | 
						|
 * for Intel AES-NI instructions. Rights for redistribution and usage
 | 
						|
 * in source and binary forms are granted according to the OpenSSL
 | 
						|
 * license.
 | 
						|
 *
 | 
						|
 *   Author: Huang Ying <ying.huang at intel dot com>
 | 
						|
 *           Vinodh Gopal <vinodh.gopal at intel dot com>
 | 
						|
 *           Kahraman Akdemir
 | 
						|
 *
 | 
						|
 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
 | 
						|
 * instructions that are going to be introduced in the next generation
 | 
						|
 * of Intel processor, as of 2009. These instructions enable fast and
 | 
						|
 * secure data encryption and decryption, using the Advanced Encryption
 | 
						|
 * Standard (AES), defined by FIPS Publication number 197. The
 | 
						|
 * architecture introduces six instructions that offer full hardware
 | 
						|
 * support for AES. Four of them support high performance data
 | 
						|
 * encryption and decryption, and the other two instructions support
 | 
						|
 * the AES key expansion procedure.
 | 
						|
 * ====================================================================
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * ====================================================================
 | 
						|
 * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without
 | 
						|
 * modification, are permitted provided that the following conditions
 | 
						|
 * are met:
 | 
						|
 *
 | 
						|
 * 1. Redistributions of source code must retain the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer.
 | 
						|
 *
 | 
						|
 * 2. Redistributions in binary form must reproduce the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer in
 | 
						|
 *    the documentation and/or other materials provided with the
 | 
						|
 *    distribution.
 | 
						|
 *
 | 
						|
 * 3. All advertising materials mentioning features or use of this
 | 
						|
 *    software must display the following acknowledgment:
 | 
						|
 *    "This product includes software developed by the OpenSSL Project
 | 
						|
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
 | 
						|
 *
 | 
						|
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
 | 
						|
 *    endorse or promote products derived from this software without
 | 
						|
 *    prior written permission. For written permission, please contact
 | 
						|
 *    openssl-core@openssl.org.
 | 
						|
 *
 | 
						|
 * 5. Products derived from this software may not be called "OpenSSL"
 | 
						|
 *    nor may "OpenSSL" appear in their names without prior written
 | 
						|
 *    permission of the OpenSSL Project.
 | 
						|
 *
 | 
						|
 * 6. Redistributions of any form whatsoever must retain the following
 | 
						|
 *    acknowledgment:
 | 
						|
 *    "This product includes software developed by the OpenSSL Project
 | 
						|
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
 | 
						|
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
						|
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
						|
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
 | 
						|
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
						|
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
						|
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 | 
						|
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 | 
						|
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | 
						|
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 | 
						|
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
 * ====================================================================
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * ====================================================================
 | 
						|
 * OpenSolaris OS modifications
 | 
						|
 *
 | 
						|
 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
 | 
						|
 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
 | 
						|
 * Huang Ying of Intel to the openssl-dev mailing list under the subject
 | 
						|
 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
 | 
						|
 *
 | 
						|
 * This OpenSolaris version has these major changes from the original source:
 | 
						|
 *
 | 
						|
 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
 | 
						|
 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
 | 
						|
 * definitions for lint.
 | 
						|
 *
 | 
						|
 * 2. Formatted code, added comments, and added #includes and #defines.
 | 
						|
 *
 | 
						|
 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
 | 
						|
 * calling kpreempt_disable() and kpreempt_enable().
 | 
						|
 * If the TS bit is not set, Save and restore %xmm registers at the beginning
 | 
						|
 * and end of function calls (%xmm* registers are not saved and restored by
 | 
						|
 * during kernel thread preemption).
 | 
						|
 *
 | 
						|
 * 4. Renamed functions, reordered parameters, and changed return value
 | 
						|
 * to match OpenSolaris:
 | 
						|
 *
 | 
						|
 * OpenSSL interface:
 | 
						|
 *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
 | 
						|
 *		const int bits, AES_KEY *key);
 | 
						|
 *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
 | 
						|
 *		const int bits, AES_KEY *key);
 | 
						|
 *	Return values for above are non-zero on error, 0 on success.
 | 
						|
 *
 | 
						|
 *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 | 
						|
 *		const AES_KEY *key);
 | 
						|
 *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 | 
						|
 *		const AES_KEY *key);
 | 
						|
 *	typedef struct aes_key_st {
 | 
						|
 *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
 | 
						|
 *		int		rounds;
 | 
						|
 *		unsigned int	pad[3];
 | 
						|
 *	} AES_KEY;
 | 
						|
 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
 | 
						|
 * (ks32) instead of 64-bit (ks64).
 | 
						|
 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
 | 
						|
 *
 | 
						|
 * OpenSolaris OS interface (#ifdefs removed for readability):
 | 
						|
 *	int rijndael_key_setup_dec_intel(uint32_t rk[],
 | 
						|
 *		const uint32_t cipherKey[], uint64_t keyBits);
 | 
						|
 *	int rijndael_key_setup_enc_intel(uint32_t rk[],
 | 
						|
 *		const uint32_t cipherKey[], uint64_t keyBits);
 | 
						|
 *	Return values for above are 0 on error, number of rounds on success.
 | 
						|
 *
 | 
						|
 *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 | 
						|
 *		const uint32_t pt[4], uint32_t ct[4]);
 | 
						|
 *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 | 
						|
 *		const uint32_t pt[4], uint32_t ct[4]);
 | 
						|
 *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
 | 
						|
 *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
 | 
						|
 *
 | 
						|
 *	typedef union {
 | 
						|
 *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 | 
						|
 *	} aes_ks_t;
 | 
						|
 *	typedef struct aes_key {
 | 
						|
 *		aes_ks_t	encr_ks, decr_ks;
 | 
						|
 *		long double	align128;
 | 
						|
 *		int		flags, nr, type;
 | 
						|
 *	} aes_key_t;
 | 
						|
 *
 | 
						|
 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
 | 
						|
 * ct is crypto text, and MAX_AES_NR is 14.
 | 
						|
 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
 | 
						|
 *
 | 
						|
 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
 | 
						|
 *
 | 
						|
 * ====================================================================
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
#if defined(lint) || defined(__lint)
 | 
						|
 | 
						|
#include <sys/types.h>
 | 
						|
 | 
						|
void
 | 
						|
aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
 | 
						|
    uint32_t ct[4]) {
 | 
						|
	(void) rk, (void) Nr, (void) pt, (void) ct;
 | 
						|
}
 | 
						|
void
 | 
						|
aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
 | 
						|
    uint32_t pt[4]) {
 | 
						|
	(void) rk, (void) Nr, (void) ct, (void) pt;
 | 
						|
}
 | 
						|
int
 | 
						|
rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 | 
						|
    uint64_t keyBits) {
 | 
						|
	(void) rk, (void) cipherKey, (void) keyBits;
 | 
						|
	return (0);
 | 
						|
}
 | 
						|
int
 | 
						|
rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 | 
						|
   uint64_t keyBits) {
 | 
						|
	(void) rk, (void) cipherKey, (void) keyBits;
 | 
						|
	return (0);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#elif defined(HAVE_AES)	/* guard by instruction set */
 | 
						|
 | 
						|
#define _ASM
 | 
						|
#include <sys/asm_linkage.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
 | 
						|
 * _key_expansion_256a(), _key_expansion_256b()
 | 
						|
 *
 | 
						|
 * Helper functions called by rijndael_key_setup_inc_intel().
 | 
						|
 * Also used indirectly by rijndael_key_setup_dec_intel().
 | 
						|
 *
 | 
						|
 * Input:
 | 
						|
 * %xmm0	User-provided cipher key
 | 
						|
 * %xmm1	Round constant
 | 
						|
 * Output:
 | 
						|
 * (%rcx)	AES key
 | 
						|
 */
 | 
						|
 | 
						|
ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
 | 
						|
_key_expansion_128_local:
 | 
						|
_key_expansion_256a_local:
 | 
						|
	pshufd	$0b11111111, %xmm1, %xmm1
 | 
						|
	shufps	$0b00010000, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	shufps	$0b10001100, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	pxor	%xmm1, %xmm0
 | 
						|
	movups	%xmm0, (%rcx)
 | 
						|
	add	$0x10, %rcx
 | 
						|
	RET
 | 
						|
	nop
 | 
						|
SET_SIZE(_key_expansion_128)
 | 
						|
SET_SIZE(_key_expansion_256a)
 | 
						|
 | 
						|
 | 
						|
ENTRY_NP(_key_expansion_192a)
 | 
						|
_key_expansion_192a_local:
 | 
						|
	pshufd	$0b01010101, %xmm1, %xmm1
 | 
						|
	shufps	$0b00010000, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	shufps	$0b10001100, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	pxor	%xmm1, %xmm0
 | 
						|
 | 
						|
	movups	%xmm2, %xmm5
 | 
						|
	movups	%xmm2, %xmm6
 | 
						|
	pslldq	$4, %xmm5
 | 
						|
	pshufd	$0b11111111, %xmm0, %xmm3
 | 
						|
	pxor	%xmm3, %xmm2
 | 
						|
	pxor	%xmm5, %xmm2
 | 
						|
 | 
						|
	movups	%xmm0, %xmm1
 | 
						|
	shufps	$0b01000100, %xmm0, %xmm6
 | 
						|
	movups	%xmm6, (%rcx)
 | 
						|
	shufps	$0b01001110, %xmm2, %xmm1
 | 
						|
	movups	%xmm1, 0x10(%rcx)
 | 
						|
	add	$0x20, %rcx
 | 
						|
	RET
 | 
						|
SET_SIZE(_key_expansion_192a)
 | 
						|
 | 
						|
 | 
						|
ENTRY_NP(_key_expansion_192b)
 | 
						|
_key_expansion_192b_local:
 | 
						|
	pshufd	$0b01010101, %xmm1, %xmm1
 | 
						|
	shufps	$0b00010000, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	shufps	$0b10001100, %xmm0, %xmm4
 | 
						|
	pxor	%xmm4, %xmm0
 | 
						|
	pxor	%xmm1, %xmm0
 | 
						|
 | 
						|
	movups	%xmm2, %xmm5
 | 
						|
	pslldq	$4, %xmm5
 | 
						|
	pshufd	$0b11111111, %xmm0, %xmm3
 | 
						|
	pxor	%xmm3, %xmm2
 | 
						|
	pxor	%xmm5, %xmm2
 | 
						|
 | 
						|
	movups	%xmm0, (%rcx)
 | 
						|
	add	$0x10, %rcx
 | 
						|
	RET
 | 
						|
SET_SIZE(_key_expansion_192b)
 | 
						|
 | 
						|
 | 
						|
ENTRY_NP(_key_expansion_256b)
 | 
						|
_key_expansion_256b_local:
 | 
						|
	pshufd	$0b10101010, %xmm1, %xmm1
 | 
						|
	shufps	$0b00010000, %xmm2, %xmm4
 | 
						|
	pxor	%xmm4, %xmm2
 | 
						|
	shufps	$0b10001100, %xmm2, %xmm4
 | 
						|
	pxor	%xmm4, %xmm2
 | 
						|
	pxor	%xmm1, %xmm2
 | 
						|
	movups	%xmm2, (%rcx)
 | 
						|
	add	$0x10, %rcx
 | 
						|
	RET
 | 
						|
SET_SIZE(_key_expansion_256b)
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * rijndael_key_setup_enc_intel()
 | 
						|
 * Expand the cipher key into the encryption key schedule.
 | 
						|
 *
 | 
						|
 * For kernel code, caller is responsible for ensuring kpreempt_disable()
 | 
						|
 * has been called.  This is because %xmm registers are not saved/restored.
 | 
						|
 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 | 
						|
 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 | 
						|
 * on the stack.
 | 
						|
 *
 | 
						|
 * OpenSolaris interface:
 | 
						|
 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
 | 
						|
 *	uint64_t keyBits);
 | 
						|
 * Return value is 0 on error, number of rounds on success.
 | 
						|
 *
 | 
						|
 * Original Intel OpenSSL interface:
 | 
						|
 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
 | 
						|
 *	const int bits, AES_KEY *key);
 | 
						|
 * Return value is non-zero on error, 0 on success.
 | 
						|
 */
 | 
						|
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
#define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
 | 
						|
#define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
 | 
						|
 | 
						|
#define	USERCIPHERKEY		rdi	/* P1, 64 bits */
 | 
						|
#define	KEYSIZE32		esi	/* P2, 32 bits */
 | 
						|
#define	KEYSIZE64		rsi	/* P2, 64 bits */
 | 
						|
#define	AESKEY			rdx	/* P3, 64 bits */
 | 
						|
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
#define	AESKEY			rdi	/* P1, 64 bits */
 | 
						|
#define	USERCIPHERKEY		rsi	/* P2, 64 bits */
 | 
						|
#define	KEYSIZE32		edx	/* P3, 32 bits */
 | 
						|
#define	KEYSIZE64		rdx	/* P3, 64 bits */
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
#define	ROUNDS32		KEYSIZE32	/* temp */
 | 
						|
#define	ROUNDS64		KEYSIZE64	/* temp */
 | 
						|
#define	ENDAESKEY		USERCIPHERKEY	/* temp */
 | 
						|
 | 
						|
ENTRY_NP(rijndael_key_setup_enc_intel)
 | 
						|
rijndael_key_setup_enc_intel_local:
 | 
						|
	FRAME_BEGIN
 | 
						|
	// NULL pointer sanity check
 | 
						|
	test	%USERCIPHERKEY, %USERCIPHERKEY
 | 
						|
	jz	.Lenc_key_invalid_param
 | 
						|
	test	%AESKEY, %AESKEY
 | 
						|
	jz	.Lenc_key_invalid_param
 | 
						|
 | 
						|
	movups	(%USERCIPHERKEY), %xmm0	// user key (first 16 bytes)
 | 
						|
	movups	%xmm0, (%AESKEY)
 | 
						|
	lea	0x10(%AESKEY), %rcx	// key addr
 | 
						|
	pxor	%xmm4, %xmm4		// xmm4 is assumed 0 in _key_expansion_x
 | 
						|
 | 
						|
	cmp	$256, %KEYSIZE32
 | 
						|
	jnz	.Lenc_key192
 | 
						|
 | 
						|
	// AES 256: 14 rounds in encryption key schedule
 | 
						|
#ifdef OPENSSL_INTERFACE
 | 
						|
	mov	$14, %ROUNDS32
 | 
						|
	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 14
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	movups	0x10(%USERCIPHERKEY), %xmm2	// other user key (2nd 16 bytes)
 | 
						|
	movups	%xmm2, (%rcx)
 | 
						|
	add	$0x10, %rcx
 | 
						|
 | 
						|
	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x1, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x2, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x4, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x8, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x10, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
	aeskeygenassist $0x20, %xmm0, %xmm1
 | 
						|
	call	_key_expansion_256b_local
 | 
						|
	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_256a_local
 | 
						|
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	xor	%rax, %rax			// return 0 (OK)
 | 
						|
#else	/* Open Solaris Interface */
 | 
						|
	mov	$14, %rax			// return # rounds = 14
 | 
						|
#endif
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Lenc_key192:
 | 
						|
	cmp	$192, %KEYSIZE32
 | 
						|
	jnz	.Lenc_key128
 | 
						|
 | 
						|
	// AES 192: 12 rounds in encryption key schedule
 | 
						|
#ifdef OPENSSL_INTERFACE
 | 
						|
	mov	$12, %ROUNDS32
 | 
						|
	movl	%ROUNDS32, 240(%AESKEY)	// key.rounds = 12
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	movq	0x10(%USERCIPHERKEY), %xmm2	// other user key
 | 
						|
	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192a_local
 | 
						|
	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192b_local
 | 
						|
	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192a_local
 | 
						|
	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192b_local
 | 
						|
	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192a_local
 | 
						|
	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192b_local
 | 
						|
	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192a_local
 | 
						|
	aeskeygenassist $0x80, %xmm2, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_192b_local
 | 
						|
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	xor	%rax, %rax			// return 0 (OK)
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
	mov	$12, %rax			// return # rounds = 12
 | 
						|
#endif
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Lenc_key128:
 | 
						|
	cmp $128, %KEYSIZE32
 | 
						|
	jnz .Lenc_key_invalid_key_bits
 | 
						|
 | 
						|
	// AES 128: 10 rounds in encryption key schedule
 | 
						|
#ifdef OPENSSL_INTERFACE
 | 
						|
	mov	$10, %ROUNDS32
 | 
						|
	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 10
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	aeskeygenassist $0x1, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x2, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x4, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x8, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x10, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x20, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x40, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x80, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x1b, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
	aeskeygenassist $0x36, %xmm0, %xmm1	// expand the key
 | 
						|
	call	_key_expansion_128_local
 | 
						|
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	xor	%rax, %rax			// return 0 (OK)
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
	mov	$10, %rax			// return # rounds = 10
 | 
						|
#endif
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
 | 
						|
.Lenc_key_invalid_param:
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	mov	$-1, %rax	// user key or AES key pointer is NULL
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
#else
 | 
						|
	/* FALLTHROUGH */
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
.Lenc_key_invalid_key_bits:
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	mov	$-2, %rax	// keysize is invalid
 | 
						|
#else	/* Open Solaris Interface */
 | 
						|
	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
	SET_SIZE(rijndael_key_setup_enc_intel)
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * rijndael_key_setup_dec_intel()
 | 
						|
 * Expand the cipher key into the decryption key schedule.
 | 
						|
 *
 | 
						|
 * For kernel code, caller is responsible for ensuring kpreempt_disable()
 | 
						|
 * has been called.  This is because %xmm registers are not saved/restored.
 | 
						|
 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 | 
						|
 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 | 
						|
 * on the stack.
 | 
						|
 *
 | 
						|
 * OpenSolaris interface:
 | 
						|
 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
 | 
						|
 *	uint64_t keyBits);
 | 
						|
 * Return value is 0 on error, number of rounds on success.
 | 
						|
 * P1->P2, P2->P3, P3->P1
 | 
						|
 *
 | 
						|
 * Original Intel OpenSSL interface:
 | 
						|
 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
 | 
						|
 *	const int bits, AES_KEY *key);
 | 
						|
 * Return value is non-zero on error, 0 on success.
 | 
						|
 */
 | 
						|
 | 
						|
ENTRY_NP(rijndael_key_setup_dec_intel)
 | 
						|
FRAME_BEGIN
 | 
						|
	// Generate round keys used for encryption
 | 
						|
	call	rijndael_key_setup_enc_intel_local
 | 
						|
	test	%rax, %rax
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	jnz	.Ldec_key_exit	// Failed if returned non-0
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
	jz	.Ldec_key_exit	// Failed if returned 0
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Convert round keys used for encryption
 | 
						|
	 * to a form usable for decryption
 | 
						|
	 */
 | 
						|
#ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
 | 
						|
	mov	%rax, %ROUNDS64		// set # rounds (10, 12, or 14)
 | 
						|
					// (already set for OpenSSL)
 | 
						|
#endif
 | 
						|
 | 
						|
	lea	0x10(%AESKEY), %rcx	// key addr
 | 
						|
	shl	$4, %ROUNDS32
 | 
						|
	add	%AESKEY, %ROUNDS64
 | 
						|
	mov	%ROUNDS64, %ENDAESKEY
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Ldec_key_reorder_loop:
 | 
						|
	movups	(%AESKEY), %xmm0
 | 
						|
	movups	(%ROUNDS64), %xmm1
 | 
						|
	movups	%xmm0, (%ROUNDS64)
 | 
						|
	movups	%xmm1, (%AESKEY)
 | 
						|
	lea	0x10(%AESKEY), %AESKEY
 | 
						|
	lea	-0x10(%ROUNDS64), %ROUNDS64
 | 
						|
	cmp	%AESKEY, %ROUNDS64
 | 
						|
	ja	.Ldec_key_reorder_loop
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Ldec_key_inv_loop:
 | 
						|
	movups	(%rcx), %xmm0
 | 
						|
	// Convert an encryption round key to a form usable for decryption
 | 
						|
	// with the "AES Inverse Mix Columns" instruction
 | 
						|
	aesimc	%xmm0, %xmm1
 | 
						|
	movups	%xmm1, (%rcx)
 | 
						|
	lea	0x10(%rcx), %rcx
 | 
						|
	cmp	%ENDAESKEY, %rcx
 | 
						|
	jnz	.Ldec_key_inv_loop
 | 
						|
 | 
						|
.Ldec_key_exit:
 | 
						|
	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
 | 
						|
	// OpenSSL: rax = 0 for OK, or non-zero for error
 | 
						|
	FRAME_END
 | 
						|
	RET
 | 
						|
	SET_SIZE(rijndael_key_setup_dec_intel)
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * aes_encrypt_intel()
 | 
						|
 * Encrypt a single block (in and out can overlap).
 | 
						|
 *
 | 
						|
 * For kernel code, caller is responsible for ensuring kpreempt_disable()
 | 
						|
 * has been called.  This is because %xmm registers are not saved/restored.
 | 
						|
 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 | 
						|
 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 | 
						|
 * on the stack.
 | 
						|
 *
 | 
						|
 * Temporary register usage:
 | 
						|
 * %xmm0	State
 | 
						|
 * %xmm1	Key
 | 
						|
 *
 | 
						|
 * Original OpenSolaris Interface:
 | 
						|
 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
 | 
						|
 *	const uint32_t pt[4], uint32_t ct[4])
 | 
						|
 *
 | 
						|
 * Original Intel OpenSSL Interface:
 | 
						|
 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
 | 
						|
 *	const AES_KEY *key)
 | 
						|
 */
 | 
						|
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
#define	aes_encrypt_intel	intel_AES_encrypt
 | 
						|
#define	aes_decrypt_intel	intel_AES_decrypt
 | 
						|
 | 
						|
#define	INP		rdi	/* P1, 64 bits */
 | 
						|
#define	OUTP		rsi	/* P2, 64 bits */
 | 
						|
#define	KEYP		rdx	/* P3, 64 bits */
 | 
						|
 | 
						|
/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
 | 
						|
#define	NROUNDS32	ecx	/* temporary, 32 bits */
 | 
						|
#define	NROUNDS		cl	/* temporary,  8 bits */
 | 
						|
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
#define	KEYP		rdi	/* P1, 64 bits */
 | 
						|
#define	NROUNDS		esi	/* P2, 32 bits */
 | 
						|
#define	INP		rdx	/* P3, 64 bits */
 | 
						|
#define	OUTP		rcx	/* P4, 64 bits */
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
#define	STATE		xmm0	/* temporary, 128 bits */
 | 
						|
#define	KEY		xmm1	/* temporary, 128 bits */
 | 
						|
 | 
						|
 | 
						|
ENTRY_NP(aes_encrypt_intel)
 | 
						|
 | 
						|
	movups	(%INP), %STATE			// input
 | 
						|
	movups	(%KEYP), %KEY			// key
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	mov	240(%KEYP), %NROUNDS32		// round count
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
	/* Round count is already present as P2 in %rsi/%esi */
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	pxor	%KEY, %STATE			// round 0
 | 
						|
	lea	0x30(%KEYP), %KEYP
 | 
						|
	cmp	$12, %NROUNDS
 | 
						|
	jb	.Lenc128
 | 
						|
	lea	0x20(%KEYP), %KEYP
 | 
						|
	je	.Lenc192
 | 
						|
 | 
						|
	// AES 256
 | 
						|
	lea	0x20(%KEYP), %KEYP
 | 
						|
	movups	-0x60(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	-0x50(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Lenc192:
 | 
						|
	// AES 192 and 256
 | 
						|
	movups	-0x40(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	-0x30(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Lenc128:
 | 
						|
	// AES 128, 192, and 256
 | 
						|
	movups	-0x20(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	-0x10(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x10(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x20(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x30(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x40(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x50(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x60(%KEYP), %KEY
 | 
						|
	aesenc	%KEY, %STATE
 | 
						|
	movups	0x70(%KEYP), %KEY
 | 
						|
	aesenclast	 %KEY, %STATE		// last round
 | 
						|
	movups	%STATE, (%OUTP)			// output
 | 
						|
 | 
						|
	RET
 | 
						|
	SET_SIZE(aes_encrypt_intel)
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * aes_decrypt_intel()
 | 
						|
 * Decrypt a single block (in and out can overlap).
 | 
						|
 *
 | 
						|
 * For kernel code, caller is responsible for ensuring kpreempt_disable()
 | 
						|
 * has been called.  This is because %xmm registers are not saved/restored.
 | 
						|
 * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
 | 
						|
 * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
 | 
						|
 * on the stack.
 | 
						|
 *
 | 
						|
 * Temporary register usage:
 | 
						|
 * %xmm0	State
 | 
						|
 * %xmm1	Key
 | 
						|
 *
 | 
						|
 * Original OpenSolaris Interface:
 | 
						|
 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
 | 
						|
 *	const uint32_t pt[4], uint32_t ct[4])/
 | 
						|
 *
 | 
						|
 * Original Intel OpenSSL Interface:
 | 
						|
 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
 | 
						|
 *	const AES_KEY *key);
 | 
						|
 */
 | 
						|
ENTRY_NP(aes_decrypt_intel)
 | 
						|
 | 
						|
	movups	(%INP), %STATE			// input
 | 
						|
	movups	(%KEYP), %KEY			// key
 | 
						|
#ifdef	OPENSSL_INTERFACE
 | 
						|
	mov	240(%KEYP), %NROUNDS32		// round count
 | 
						|
#else	/* OpenSolaris Interface */
 | 
						|
	/* Round count is already present as P2 in %rsi/%esi */
 | 
						|
#endif	/* OPENSSL_INTERFACE */
 | 
						|
 | 
						|
	pxor	%KEY, %STATE			// round 0
 | 
						|
	lea	0x30(%KEYP), %KEYP
 | 
						|
	cmp	$12, %NROUNDS
 | 
						|
	jb	.Ldec128
 | 
						|
	lea	0x20(%KEYP), %KEYP
 | 
						|
	je	.Ldec192
 | 
						|
 | 
						|
	// AES 256
 | 
						|
	lea	0x20(%KEYP), %KEYP
 | 
						|
	movups	-0x60(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	-0x50(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Ldec192:
 | 
						|
	// AES 192 and 256
 | 
						|
	movups	-0x40(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	-0x30(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
 | 
						|
.balign 4
 | 
						|
.Ldec128:
 | 
						|
	// AES 128, 192, and 256
 | 
						|
	movups	-0x20(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	-0x10(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x10(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x20(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x30(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x40(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x50(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x60(%KEYP), %KEY
 | 
						|
	aesdec	%KEY, %STATE
 | 
						|
	movups	0x70(%KEYP), %KEY
 | 
						|
	aesdeclast	%KEY, %STATE		// last round
 | 
						|
	movups	%STATE, (%OUTP)			// output
 | 
						|
 | 
						|
	RET
 | 
						|
	SET_SIZE(aes_decrypt_intel)
 | 
						|
 | 
						|
#endif	/* lint || __lint */
 | 
						|
 | 
						|
#ifdef __ELF__
 | 
						|
.section .note.GNU-stack,"",%progbits
 | 
						|
#endif
 |