Linux 5.0 compat: SIMD compatibility

Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS,
and 5.0 and newer kernels.

This commit squashes the following commits from master in to
a single commit which can be applied to 0.8.2.

10fa2545 - Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
b88ca2ac - Enable SIMD for encryption
095b5412 - Fix CONFIG_X86_DEBUG_FPU build failure
e5db3134 - Linux 5.0 compat: SIMD compatibility

Reviewed-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
TEST_ZIMPORT_SKIP="yes"
This commit is contained in:
Brian Behlendorf
2019-07-12 09:31:20 -07:00
committed by Tony Hutter
parent 988b040476
commit 62c034f6d4
30 changed files with 548 additions and 206 deletions
+19 -12
View File
@@ -27,6 +27,7 @@
#include <sys/crypto/spi.h>
#include <modes/modes.h>
#include <aes/aes_impl.h>
#include <linux/simd.h>
/*
* Initialize AES encryption and decryption key schedules.
@@ -40,9 +41,9 @@
void
aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
{
aes_impl_ops_t *ops = aes_impl_get_ops();
aes_key_t *newbie = keysched;
uint_t keysize, i, j;
const aes_impl_ops_t *ops = aes_impl_get_ops();
aes_key_t *newbie = keysched;
uint_t keysize, i, j;
union {
uint64_t ka64[4];
uint32_t ka32[8];
@@ -252,12 +253,17 @@ static size_t aes_supp_impl_cnt = 0;
static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
/*
* Selects the aes operations for encrypt/decrypt/key setup
* Returns the AES operations for encrypt/decrypt/key setup. When a
* SIMD implementation is not allowed in the current context, then
* fallback to the fastest generic implementation.
*/
aes_impl_ops_t *
aes_impl_get_ops()
const aes_impl_ops_t *
aes_impl_get_ops(void)
{
aes_impl_ops_t *ops = NULL;
if (!kfpu_allowed())
return (&aes_generic_impl);
const aes_impl_ops_t *ops = NULL;
const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
switch (impl) {
@@ -266,15 +272,13 @@ aes_impl_get_ops()
ops = &aes_fastest_impl;
break;
case IMPL_CYCLE:
{
/* Cycle through supported implementations */
ASSERT(aes_impl_initialized);
ASSERT3U(aes_supp_impl_cnt, >, 0);
/* Cycle through supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
ops = aes_supp_impl[idx];
}
break;
break;
default:
ASSERT3U(impl, <, aes_supp_impl_cnt);
ASSERT3U(aes_supp_impl_cnt, >, 0);
@@ -288,13 +292,16 @@ aes_impl_get_ops()
return (ops);
}
/*
* Initialize all supported implementations.
*/
void
aes_impl_init(void)
{
aes_impl_ops_t *curr_impl;
int i, c;
/* move supported impl into aes_supp_impls */
/* Move supported implementations into aes_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
+1 -1
View File
@@ -108,7 +108,7 @@ aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
static boolean_t
aes_aesni_will_work(void)
{
return (zfs_aes_available());
return (kfpu_allowed() && zfs_aes_available());
}
const aes_impl_ops_t aes_aesni_impl = {
+24 -14
View File
@@ -29,6 +29,7 @@
#include <sys/crypto/impl.h>
#include <sys/byteorder.h>
#include <modes/gcm_impl.h>
#include <linux/simd.h>
#define GHASH(c, d, t, o) \
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
@@ -46,7 +47,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
size_t remainder = length;
size_t need = 0;
uint8_t *datap = (uint8_t *)data;
@@ -168,7 +169,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
uint8_t *ghash, *macp = NULL;
int i, rv;
@@ -320,7 +321,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
size_t pt_len;
size_t remainder;
uint8_t *ghash;
@@ -427,7 +428,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint8_t *cb;
ulong_t remainder = iv_len;
ulong_t processed = 0;
@@ -481,7 +482,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint8_t *ghash, *datap, *authp;
size_t remainder, processed;
@@ -660,12 +661,17 @@ static size_t gcm_supp_impl_cnt = 0;
static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
/*
* Selects the gcm operation
* Returns the GCM operations for encrypt/decrypt/key setup. When a
* SIMD implementation is not allowed in the current context, then
* fallback to the fastest generic implementation.
*/
gcm_impl_ops_t *
const gcm_impl_ops_t *
gcm_impl_get_ops()
{
gcm_impl_ops_t *ops = NULL;
if (!kfpu_allowed())
return (&gcm_generic_impl);
const gcm_impl_ops_t *ops = NULL;
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
switch (impl) {
@@ -674,15 +680,13 @@ gcm_impl_get_ops()
ops = &gcm_fastest_impl;
break;
case IMPL_CYCLE:
{
/* Cycle through supported implementations */
ASSERT(gcm_impl_initialized);
ASSERT3U(gcm_supp_impl_cnt, >, 0);
/* Cycle through supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
ops = gcm_supp_impl[idx];
}
break;
break;
default:
ASSERT3U(impl, <, gcm_supp_impl_cnt);
ASSERT3U(gcm_supp_impl_cnt, >, 0);
@@ -696,13 +700,16 @@ gcm_impl_get_ops()
return (ops);
}
/*
* Initialize all supported implementations.
*/
void
gcm_impl_init(void)
{
gcm_impl_ops_t *curr_impl;
int i, c;
/* move supported impl into aes_supp_impls */
/* Move supported implementations into gcm_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
@@ -711,7 +718,10 @@ gcm_impl_init(void)
}
gcm_supp_impl_cnt = c;
/* set fastest implementation. assume hardware accelerated is fastest */
/*
* Set the fastest implementation given the assumption that the
* hardware accelerated version is the fastest.
*/
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
if (gcm_pclmulqdq_impl.is_supported()) {
memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
+1 -1
View File
@@ -52,7 +52,7 @@ gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
static boolean_t
gcm_pclmulqdq_will_work(void)
{
return (zfs_pclmulqdq_available());
return (kfpu_allowed() && zfs_pclmulqdq_available());
}
const gcm_impl_ops_t gcm_pclmulqdq_impl = {