mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Add support for selecting encryption backend
- Add two new module parameters to icp (icp_aes_impl, icp_gcm_impl) that control the crypto implementation. At the moment there is a choice between generic and aesni (on platforms that support it). - This enables support for AES-NI and PCLMULQDQ-NI on AMD Family 15h (bulldozer) and newer CPUs (zen). - Modify aes_key_t to track what implementation it was generated with as key schedules generated with various implementations are not necessarily interchangable. Reviewed by: Gvozden Neskovic <neskovic@gmail.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tom Caputi <tcaputi@datto.com> Reviewed-by: Richard Laager <rlaager@wiktel.com> Signed-off-by: Nathaniel R. Lewis <linux.robotdude@gmail.com> Closes #7102 Closes #7103
This commit is contained in:
committed by
Brian Behlendorf
parent
3d503a76e8
commit
010d12474c
+259
-1445
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AES)
|
||||
|
||||
#include <linux/simd_x86.h>
|
||||
|
||||
/* These functions are used to execute AES-NI instructions: */
|
||||
extern int rijndael_key_setup_enc_intel(uint32_t rk[],
|
||||
const uint32_t cipherKey[], uint64_t keyBits);
|
||||
extern int rijndael_key_setup_dec_intel(uint32_t rk[],
|
||||
const uint32_t cipherKey[], uint64_t keyBits);
|
||||
extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
|
||||
const uint32_t pt[4], uint32_t ct[4]);
|
||||
extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
|
||||
const uint32_t ct[4], uint32_t pt[4]);
|
||||
|
||||
|
||||
#include <aes/aes_impl.h>
|
||||
|
||||
/*
|
||||
* Expand the 32-bit AES cipher key array into the encryption and decryption
|
||||
* key schedules.
|
||||
*
|
||||
* Parameters:
|
||||
* key AES key schedule to be initialized
|
||||
* keyarr32 User key
|
||||
* keyBits AES key size (128, 192, or 256 bits)
|
||||
*/
|
||||
static void
|
||||
aes_aesni_generate(aes_key_t *key, const uint32_t *keyarr32, int keybits)
|
||||
{
|
||||
kfpu_begin();
|
||||
key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
|
||||
keyarr32, keybits);
|
||||
key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
|
||||
keyarr32, keybits);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Encrypt one block of data. The block is assumed to be an array
|
||||
* of four uint32_t values, so copy for alignment (and byte-order
|
||||
* reversal for little endian systems might be necessary on the
|
||||
* input and output byte streams.
|
||||
* The size of the key schedule depends on the number of rounds
|
||||
* (which can be computed from the size of the key), i.e. 4*(Nr + 1).
|
||||
*
|
||||
* Parameters:
|
||||
* rk Key schedule, of aes_ks_t (60 32-bit integers)
|
||||
* Nr Number of rounds
|
||||
* pt Input block (plain text)
|
||||
* ct Output block (crypto text). Can overlap with pt
|
||||
*/
|
||||
static void
|
||||
aes_aesni_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
|
||||
uint32_t ct[4])
|
||||
{
|
||||
kfpu_begin();
|
||||
aes_encrypt_intel(rk, Nr, pt, ct);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrypt one block of data. The block is assumed to be an array
|
||||
* of four uint32_t values, so copy for alignment (and byte-order
|
||||
* reversal for little endian systems might be necessary on the
|
||||
* input and output byte streams.
|
||||
* The size of the key schedule depends on the number of rounds
|
||||
* (which can be computed from the size of the key), i.e. 4*(Nr + 1).
|
||||
*
|
||||
* Parameters:
|
||||
* rk Key schedule, of aes_ks_t (60 32-bit integers)
|
||||
* Nr Number of rounds
|
||||
* ct Input block (crypto text)
|
||||
* pt Output block (plain text). Can overlap with pt
|
||||
*/
|
||||
static void
|
||||
aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
|
||||
uint32_t pt[4])
|
||||
{
|
||||
kfpu_begin();
|
||||
aes_decrypt_intel(rk, Nr, ct, pt);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
aes_aesni_will_work(void)
|
||||
{
|
||||
return (zfs_aes_available());
|
||||
}
|
||||
|
||||
const aes_impl_ops_t aes_aesni_impl = {
|
||||
.generate = &aes_aesni_generate,
|
||||
.encrypt = &aes_aesni_encrypt,
|
||||
.decrypt = &aes_aesni_decrypt,
|
||||
.is_supported = &aes_aesni_will_work,
|
||||
.needs_byteswap = B_FALSE,
|
||||
.name = "aesni"
|
||||
};
|
||||
|
||||
#endif /* defined(__x86_64) && defined(HAVE_AES) */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#if defined(__x86_64)
|
||||
|
||||
#include <linux/simd_x86.h>
|
||||
|
||||
/* These functions are used to execute amd64 instructions for AMD or Intel: */
|
||||
extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
|
||||
const uint32_t cipherKey[], int keyBits);
|
||||
extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
|
||||
const uint32_t cipherKey[], int keyBits);
|
||||
extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
|
||||
const uint32_t pt[4], uint32_t ct[4]);
|
||||
extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
|
||||
const uint32_t ct[4], uint32_t pt[4]);
|
||||
|
||||
|
||||
#include <aes/aes_impl.h>
|
||||
|
||||
/*
|
||||
* Expand the 32-bit AES cipher key array into the encryption and decryption
|
||||
* key schedules.
|
||||
*
|
||||
* Parameters:
|
||||
* key AES key schedule to be initialized
|
||||
* keyarr32 User key
|
||||
* keyBits AES key size (128, 192, or 256 bits)
|
||||
*/
|
||||
static void
|
||||
aes_x86_64_generate(aes_key_t *key, const uint32_t *keyarr32, int keybits)
|
||||
{
|
||||
key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
|
||||
keyarr32, keybits);
|
||||
key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
|
||||
keyarr32, keybits);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
aes_x86_64_will_work(void)
|
||||
{
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
const aes_impl_ops_t aes_x86_64_impl = {
|
||||
.generate = &aes_x86_64_generate,
|
||||
.encrypt = &aes_encrypt_amd64,
|
||||
.decrypt = &aes_decrypt_amd64,
|
||||
.is_supported = &aes_x86_64_will_work,
|
||||
.needs_byteswap = B_FALSE,
|
||||
.name = "x86_64"
|
||||
};
|
||||
|
||||
#endif /* defined(__x86_64) */
|
||||
+210
-123
@@ -22,93 +22,19 @@
|
||||
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(__amd64)
|
||||
#include <linux/simd_x86.h>
|
||||
|
||||
#define KPREEMPT_DISABLE kfpu_begin()
|
||||
#define KPREEMPT_ENABLE kfpu_end()
|
||||
|
||||
#else
|
||||
#define KPREEMPT_DISABLE
|
||||
#define KPREEMPT_ENABLE
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <modes/modes.h>
|
||||
#include <sys/crypto/common.h>
|
||||
#include <sys/crypto/icp.h>
|
||||
#include <sys/crypto/impl.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <modes/gcm_impl.h>
|
||||
|
||||
#ifdef __amd64
|
||||
|
||||
extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
|
||||
static int intel_pclmulqdq_instruction_present(void);
|
||||
#endif /* __amd64 */
|
||||
|
||||
struct aes_block {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* gcm_mul()
|
||||
* Perform a carry-less multiplication (that is, use XOR instead of the
|
||||
* multiply operator) on *x_in and *y and place the result in *res.
|
||||
*
|
||||
* Byte swap the input (*x_in and *y) and the output (*res).
|
||||
*
|
||||
* Note: x_in, y, and res all point to 16-byte numbers (an array of two
|
||||
* 64-bit integers).
|
||||
*/
|
||||
void
|
||||
gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
|
||||
{
|
||||
#ifdef __amd64
|
||||
if (intel_pclmulqdq_instruction_present()) {
|
||||
KPREEMPT_DISABLE;
|
||||
gcm_mul_pclmulqdq(x_in, y, res);
|
||||
KPREEMPT_ENABLE;
|
||||
} else
|
||||
#endif /* __amd64 */
|
||||
{
|
||||
static const uint64_t R = 0xe100000000000000ULL;
|
||||
struct aes_block z = {0, 0};
|
||||
struct aes_block v;
|
||||
uint64_t x;
|
||||
int i, j;
|
||||
|
||||
v.a = ntohll(y[0]);
|
||||
v.b = ntohll(y[1]);
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
x = ntohll(x_in[j]);
|
||||
for (i = 0; i < 64; i++, x <<= 1) {
|
||||
if (x & 0x8000000000000000ULL) {
|
||||
z.a ^= v.a;
|
||||
z.b ^= v.b;
|
||||
}
|
||||
if (v.b & 1ULL) {
|
||||
v.b = (v.a << 63)|(v.b >> 1);
|
||||
v.a = (v.a >> 1) ^ R;
|
||||
} else {
|
||||
v.b = (v.a << 63)|(v.b >> 1);
|
||||
v.a = v.a >> 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
res[0] = htonll(z.a);
|
||||
res[1] = htonll(z.b);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define GHASH(c, d, t) \
|
||||
#define GHASH(c, d, t, o) \
|
||||
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
|
||||
gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
|
||||
(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
|
||||
(uint64_t *)(void *)(t));
|
||||
|
||||
|
||||
/*
|
||||
* Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
|
||||
* is done in another function.
|
||||
@@ -120,6 +46,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
|
||||
void (*copy_block)(uint8_t *, uint8_t *),
|
||||
void (*xor_block)(uint8_t *, uint8_t *))
|
||||
{
|
||||
gcm_impl_ops_t *gops;
|
||||
size_t remainder = length;
|
||||
size_t need = 0;
|
||||
uint8_t *datap = (uint8_t *)data;
|
||||
@@ -147,6 +74,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
|
||||
if (out != NULL)
|
||||
crypto_init_ptrs(out, &iov_or_mp, &offset);
|
||||
|
||||
gops = gcm_impl_get_ops();
|
||||
do {
|
||||
/* Unprocessed data from last call. */
|
||||
if (ctx->gcm_remainder_len > 0) {
|
||||
@@ -207,7 +135,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
|
||||
}
|
||||
|
||||
/* add ciphertext to the hash */
|
||||
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
|
||||
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
|
||||
|
||||
/* Update pointer to next block of data to be processed. */
|
||||
if (ctx->gcm_remainder_len != 0) {
|
||||
@@ -240,6 +168,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
void (*copy_block)(uint8_t *, uint8_t *),
|
||||
void (*xor_block)(uint8_t *, uint8_t *))
|
||||
{
|
||||
gcm_impl_ops_t *gops;
|
||||
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
|
||||
uint8_t *ghash, *macp = NULL;
|
||||
int i, rv;
|
||||
@@ -249,6 +178,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
return (CRYPTO_DATA_LEN_RANGE);
|
||||
}
|
||||
|
||||
gops = gcm_impl_get_ops();
|
||||
ghash = (uint8_t *)ctx->gcm_ghash;
|
||||
|
||||
if (ctx->gcm_remainder_len > 0) {
|
||||
@@ -281,14 +211,14 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
}
|
||||
|
||||
/* add ciphertext to the hash */
|
||||
GHASH(ctx, macp, ghash);
|
||||
GHASH(ctx, macp, ghash, gops);
|
||||
|
||||
ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
|
||||
}
|
||||
|
||||
ctx->gcm_len_a_len_c[1] =
|
||||
htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
|
||||
GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
|
||||
GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
|
||||
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
|
||||
(uint8_t *)ctx->gcm_J0);
|
||||
xor_block((uint8_t *)ctx->gcm_J0, ghash);
|
||||
@@ -340,7 +270,7 @@ gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
|
||||
bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
|
||||
|
||||
/* add ciphertext to the hash */
|
||||
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
|
||||
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
|
||||
|
||||
/* decrypt remaining ciphertext */
|
||||
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
|
||||
@@ -390,6 +320,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
|
||||
void (*xor_block)(uint8_t *, uint8_t *))
|
||||
{
|
||||
gcm_impl_ops_t *gops;
|
||||
size_t pt_len;
|
||||
size_t remainder;
|
||||
uint8_t *ghash;
|
||||
@@ -401,6 +332,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
|
||||
ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
|
||||
|
||||
gops = gcm_impl_get_ops();
|
||||
pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
|
||||
ghash = (uint8_t *)ctx->gcm_ghash;
|
||||
blockp = ctx->gcm_pt_buf;
|
||||
@@ -420,7 +352,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
goto out;
|
||||
}
|
||||
/* add ciphertext to the hash */
|
||||
GHASH(ctx, blockp, ghash);
|
||||
GHASH(ctx, blockp, ghash, gops);
|
||||
|
||||
/*
|
||||
* Increment counter.
|
||||
@@ -443,7 +375,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
|
||||
}
|
||||
out:
|
||||
ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
|
||||
GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
|
||||
GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
|
||||
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
|
||||
(uint8_t *)ctx->gcm_J0);
|
||||
xor_block((uint8_t *)ctx->gcm_J0, ghash);
|
||||
@@ -495,12 +427,14 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
|
||||
void (*copy_block)(uint8_t *, uint8_t *),
|
||||
void (*xor_block)(uint8_t *, uint8_t *))
|
||||
{
|
||||
gcm_impl_ops_t *gops;
|
||||
uint8_t *cb;
|
||||
ulong_t remainder = iv_len;
|
||||
ulong_t processed = 0;
|
||||
uint8_t *datap, *ghash;
|
||||
uint64_t len_a_len_c[2];
|
||||
|
||||
gops = gcm_impl_get_ops();
|
||||
ghash = (uint8_t *)ctx->gcm_ghash;
|
||||
cb = (uint8_t *)ctx->gcm_cb;
|
||||
if (iv_len == 12) {
|
||||
@@ -524,12 +458,12 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
|
||||
processed += block_size;
|
||||
remainder -= block_size;
|
||||
}
|
||||
GHASH(ctx, datap, ghash);
|
||||
GHASH(ctx, datap, ghash, gops);
|
||||
} while (remainder > 0);
|
||||
|
||||
len_a_len_c[0] = 0;
|
||||
len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
|
||||
GHASH(ctx, len_a_len_c, ctx->gcm_J0);
|
||||
GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
|
||||
|
||||
/* J0 will be used again in the final */
|
||||
copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
|
||||
@@ -547,6 +481,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
|
||||
void (*copy_block)(uint8_t *, uint8_t *),
|
||||
void (*xor_block)(uint8_t *, uint8_t *))
|
||||
{
|
||||
gcm_impl_ops_t *gops;
|
||||
uint8_t *ghash, *datap, *authp;
|
||||
size_t remainder, processed;
|
||||
|
||||
@@ -558,6 +493,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
|
||||
gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
|
||||
copy_block, xor_block);
|
||||
|
||||
gops = gcm_impl_get_ops();
|
||||
authp = (uint8_t *)ctx->gcm_tmp;
|
||||
ghash = (uint8_t *)ctx->gcm_ghash;
|
||||
bzero(authp, block_size);
|
||||
@@ -582,7 +518,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
|
||||
}
|
||||
|
||||
/* add auth data to the hash */
|
||||
GHASH(ctx, datap, ghash);
|
||||
GHASH(ctx, datap, ghash, gops);
|
||||
|
||||
} while (remainder > 0);
|
||||
|
||||
@@ -694,55 +630,206 @@ gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
|
||||
ctx->gcm_kmflag = kmflag;
|
||||
}
|
||||
|
||||
/* GCM implementation that contains the fastest methods */
|
||||
static gcm_impl_ops_t gcm_fastest_impl = {
|
||||
.name = "fastest"
|
||||
};
|
||||
|
||||
#ifdef __amd64
|
||||
/* All compiled in implementations */
|
||||
const gcm_impl_ops_t *gcm_all_impl[] = {
|
||||
&gcm_generic_impl,
|
||||
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
|
||||
&gcm_pclmulqdq_impl,
|
||||
#endif
|
||||
};
|
||||
|
||||
#define INTEL_PCLMULQDQ_FLAG (1 << 1)
|
||||
/* Indicate that benchmark has been completed */
|
||||
static boolean_t gcm_impl_initialized = B_FALSE;
|
||||
|
||||
/* Select aes implementation */
|
||||
#define IMPL_FASTEST (UINT32_MAX)
|
||||
#define IMPL_CYCLE (UINT32_MAX-1)
|
||||
|
||||
#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
|
||||
|
||||
static uint32_t icp_gcm_impl = IMPL_FASTEST;
|
||||
static uint32_t user_sel_impl = IMPL_FASTEST;
|
||||
|
||||
/* Hold all supported implementations */
|
||||
static size_t gcm_supp_impl_cnt = 0;
|
||||
static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
|
||||
|
||||
/*
|
||||
* Return 1 if executing on Intel with PCLMULQDQ instructions,
|
||||
* otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
|
||||
* Cache the result, as the CPU can't change.
|
||||
*
|
||||
* Note: the userland version uses getisax(). The kernel version uses
|
||||
* is_x86_featureset().
|
||||
* Selects the gcm operation
|
||||
*/
|
||||
static int
|
||||
intel_pclmulqdq_instruction_present(void)
|
||||
gcm_impl_ops_t *
|
||||
gcm_impl_get_ops()
|
||||
{
|
||||
static int cached_result = -1;
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
unsigned func, subfunc;
|
||||
gcm_impl_ops_t *ops = NULL;
|
||||
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
|
||||
|
||||
if (cached_result == -1) { /* first time */
|
||||
/* check for an intel cpu */
|
||||
func = 0;
|
||||
subfunc = 0;
|
||||
switch (impl) {
|
||||
case IMPL_FASTEST:
|
||||
ASSERT(gcm_impl_initialized);
|
||||
ops = &gcm_fastest_impl;
|
||||
break;
|
||||
case IMPL_CYCLE:
|
||||
{
|
||||
ASSERT(gcm_impl_initialized);
|
||||
ASSERT3U(gcm_supp_impl_cnt, >, 0);
|
||||
/* Cycle through supported implementations */
|
||||
static size_t cycle_impl_idx = 0;
|
||||
size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
|
||||
ops = gcm_supp_impl[idx];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ASSERT3U(impl, <, gcm_supp_impl_cnt);
|
||||
ASSERT3U(gcm_supp_impl_cnt, >, 0);
|
||||
if (impl < ARRAY_SIZE(gcm_all_impl))
|
||||
ops = gcm_supp_impl[impl];
|
||||
break;
|
||||
}
|
||||
|
||||
__asm__ __volatile__(
|
||||
"cpuid"
|
||||
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||
: "a"(func), "c"(subfunc));
|
||||
ASSERT3P(ops, !=, NULL);
|
||||
|
||||
if (memcmp((char *)(&ebx), "Genu", 4) == 0 &&
|
||||
memcmp((char *)(&edx), "ineI", 4) == 0 &&
|
||||
memcmp((char *)(&ecx), "ntel", 4) == 0) {
|
||||
func = 1;
|
||||
subfunc = 0;
|
||||
return (ops);
|
||||
}
|
||||
|
||||
/* check for aes-ni instruction set */
|
||||
__asm__ __volatile__(
|
||||
"cpuid"
|
||||
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||
: "a"(func), "c"(subfunc));
|
||||
void
|
||||
gcm_impl_init(void)
|
||||
{
|
||||
gcm_impl_ops_t *curr_impl;
|
||||
int i, c;
|
||||
|
||||
cached_result = !!(ecx & INTEL_PCLMULQDQ_FLAG);
|
||||
} else {
|
||||
cached_result = 0;
|
||||
/* move supported impl into aes_supp_impls */
|
||||
for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
|
||||
curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
|
||||
|
||||
if (curr_impl->is_supported())
|
||||
gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
|
||||
}
|
||||
gcm_supp_impl_cnt = c;
|
||||
|
||||
/* set fastest implementation. assume hardware accelerated is fastest */
|
||||
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
|
||||
if (gcm_pclmulqdq_impl.is_supported())
|
||||
memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
|
||||
sizeof (gcm_fastest_impl));
|
||||
else
|
||||
#endif
|
||||
memcpy(&gcm_fastest_impl, &gcm_generic_impl,
|
||||
sizeof (gcm_fastest_impl));
|
||||
|
||||
strcpy(gcm_fastest_impl.name, "fastest");
|
||||
|
||||
/* Finish initialization */
|
||||
atomic_swap_32(&icp_gcm_impl, user_sel_impl);
|
||||
gcm_impl_initialized = B_TRUE;
|
||||
}
|
||||
|
||||
static const struct {
|
||||
char *name;
|
||||
uint32_t sel;
|
||||
} gcm_impl_opts[] = {
|
||||
{ "cycle", IMPL_CYCLE },
|
||||
{ "fastest", IMPL_FASTEST },
|
||||
};
|
||||
|
||||
/*
|
||||
* Function sets desired gcm implementation.
|
||||
*
|
||||
* If we are called before init(), user preference will be saved in
|
||||
* user_sel_impl, and applied in later init() call. This occurs when module
|
||||
* parameter is specified on module load. Otherwise, directly update
|
||||
* icp_aes_impl.
|
||||
*
|
||||
* @val Name of gcm implementation to use
|
||||
* @param Unused.
|
||||
*/
|
||||
int
|
||||
gcm_impl_set(const char *val)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
char req_name[GCM_IMPL_NAME_MAX];
|
||||
uint32_t impl = GCM_IMPL_READ(user_sel_impl);
|
||||
size_t i;
|
||||
|
||||
/* sanitize input */
|
||||
i = strnlen(val, GCM_IMPL_NAME_MAX);
|
||||
if (i == 0 || i >= GCM_IMPL_NAME_MAX)
|
||||
return (err);
|
||||
|
||||
strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
|
||||
while (i > 0 && isspace(req_name[i-1]))
|
||||
i--;
|
||||
req_name[i] = '\0';
|
||||
|
||||
/* Check mandatory options */
|
||||
for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
|
||||
if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
|
||||
impl = gcm_impl_opts[i].sel;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (cached_result);
|
||||
/* check all supported impl if init() was already called */
|
||||
if (err != 0 && gcm_impl_initialized) {
|
||||
/* check all supported implementations */
|
||||
for (i = 0; i < gcm_supp_impl_cnt; i++) {
|
||||
if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
|
||||
impl = i;
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0) {
|
||||
if (gcm_impl_initialized)
|
||||
atomic_swap_32(&icp_gcm_impl, impl);
|
||||
else
|
||||
atomic_swap_32(&user_sel_impl, impl);
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
#endif /* __amd64 */
|
||||
#if defined(_KERNEL)
|
||||
#include <linux/mod_compat.h>
|
||||
|
||||
static int
|
||||
icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
|
||||
{
|
||||
return (gcm_impl_set(val));
|
||||
}
|
||||
|
||||
static int
|
||||
icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int i, cnt = 0;
|
||||
char *fmt;
|
||||
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
|
||||
|
||||
ASSERT(gcm_impl_initialized);
|
||||
|
||||
/* list mandatory options */
|
||||
for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
|
||||
fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
|
||||
cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
|
||||
}
|
||||
|
||||
/* list all supported implementations */
|
||||
for (i = 0; i < gcm_supp_impl_cnt; i++) {
|
||||
fmt = (i == impl) ? "[%s] " : "%s ";
|
||||
cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
|
||||
}
|
||||
|
||||
return (cnt);
|
||||
}
|
||||
|
||||
module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
|
||||
NULL, 0644);
|
||||
MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <modes/gcm_impl.h>
|
||||
|
||||
struct aes_block {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
};
|
||||
|
||||
/*
|
||||
* Perform a carry-less multiplication (that is, use XOR instead of the
|
||||
* multiply operator) on *x_in and *y and place the result in *res.
|
||||
*
|
||||
* Byte swap the input (*x_in and *y) and the output (*res).
|
||||
*
|
||||
* Note: x_in, y, and res all point to 16-byte numbers (an array of two
|
||||
* 64-bit integers).
|
||||
*/
|
||||
static void
|
||||
gcm_generic_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
|
||||
{
|
||||
static const uint64_t R = 0xe100000000000000ULL;
|
||||
struct aes_block z = {0, 0};
|
||||
struct aes_block v;
|
||||
uint64_t x;
|
||||
int i, j;
|
||||
|
||||
v.a = ntohll(y[0]);
|
||||
v.b = ntohll(y[1]);
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
x = ntohll(x_in[j]);
|
||||
for (i = 0; i < 64; i++, x <<= 1) {
|
||||
if (x & 0x8000000000000000ULL) {
|
||||
z.a ^= v.a;
|
||||
z.b ^= v.b;
|
||||
}
|
||||
if (v.b & 1ULL) {
|
||||
v.b = (v.a << 63)|(v.b >> 1);
|
||||
v.a = (v.a >> 1) ^ R;
|
||||
} else {
|
||||
v.b = (v.a << 63)|(v.b >> 1);
|
||||
v.a = v.a >> 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
res[0] = htonll(z.a);
|
||||
res[1] = htonll(z.b);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
gcm_generic_will_work(void)
|
||||
{
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
const gcm_impl_ops_t gcm_generic_impl = {
|
||||
.mul = &gcm_generic_mul,
|
||||
.is_supported = &gcm_generic_will_work,
|
||||
.name = "generic"
|
||||
};
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
|
||||
|
||||
#include <linux/simd_x86.h>
|
||||
|
||||
/* These functions are used to execute pclmulqdq based assembly methods */
|
||||
extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *);
|
||||
|
||||
|
||||
#include <modes/gcm_impl.h>
|
||||
|
||||
/*
|
||||
* Perform a carry-less multiplication (that is, use XOR instead of the
|
||||
* multiply operator) on *x_in and *y and place the result in *res.
|
||||
*
|
||||
* Byte swap the input (*x_in and *y) and the output (*res).
|
||||
*
|
||||
* Note: x_in, y, and res all point to 16-byte numbers (an array of two
|
||||
* 64-bit integers).
|
||||
*/
|
||||
static void
|
||||
gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
|
||||
{
|
||||
kfpu_begin();
|
||||
gcm_mul_pclmulqdq(x_in, y, res);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
gcm_pclmulqdq_will_work(void)
|
||||
{
|
||||
return (zfs_pclmulqdq_available());
|
||||
}
|
||||
|
||||
const gcm_impl_ops_t gcm_pclmulqdq_impl = {
|
||||
.mul = &gcm_pclmulqdq_mul,
|
||||
.is_supported = &gcm_pclmulqdq_will_work,
|
||||
.name = "pclmulqdq"
|
||||
};
|
||||
|
||||
#endif /* defined(__x86_64) && defined(HAVE_PCLMULQDQ) */
|
||||
Reference in New Issue
Block a user