mirror_zfs/module/icp/algs/sha2/sha2.c
Tom Caputi 0b04990a5d Illumos Crypto Port module added to enable native encryption in zfs
A port of the Illumos Crypto Framework to a Linux kernel module (found
in module/icp). This is needed to do the actual encryption work. We cannot
use the Linux kernel's built in crypto api because it is only exported to
GPL-licensed modules. Having the ICP also means the crypto code can run on
any of the other kernels under OpenZFS. I ended up porting over most of the
internals of the framework, which means that porting over other API calls (if
we need them) should be fairly easy. Specifically, I have ported over the API
functions related to encryption, digests, macs, and crypto templates. The ICP
is able to use assembly-accelerated encryption on amd64 machines and AES-NI
instructions on Intel chips that support it. There are place-holder
directories for similar assembly optimizations for other architectures
(although they have not been written).

Signed-off-by: Tom Caputi <tcaputi@datto.com>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #4329
2016-07-20 10:43:30 -07:00

496 lines
16 KiB
C

/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2013 Saso Kiselkov. All rights reserved.
*/
/*
* The basic framework for this code came from the reference
* implementation for MD5. That implementation is Copyright (C)
* 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software for any particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*
* NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
* standard, available at
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
* Not as fast as one would like -- further optimizations are encouraged
* and appreciated.
*/
#include <sys/zfs_context.h>
#define _SHA2_IMPL
#include <sha2/sha2.h>
#include <sha2/sha2_consts.h>
#define _RESTRICT_KYWD
#ifdef _LITTLE_ENDIAN
#include <sys/byteorder.h>
#define HAVE_HTONL
#endif
static void Encode(uint8_t *, uint32_t *, size_t);
#if defined(__amd64)
#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
#else
static void SHA256Transform(SHA2_CTX *, const uint8_t *);
#endif /* __amd64 */
static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
/* Ch and Maj are the basic SHA2 functions. */
#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))
#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
/* Rotates x right n bits. */
#define ROTR(x, n) \
(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
/* Shift x right n bits */
#define SHR(x, n) ((x) >> (n))
/* SHA256 Functions */
#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \
d += T1; \
T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \
h = T1 + T2
/*
* sparc optimization:
*
* on the sparc, we can load big endian 32-bit data easily. note that
* special care must be taken to ensure the address is 32-bit aligned.
* in the interest of speed, we don't check to make sure, since
* careful programming can guarantee this for us.
*/
#if defined(_BIG_ENDIAN)
#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
#define LOAD_BIG_64(addr) (*(uint64_t *)(addr))
#elif defined(HAVE_HTONL)
#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
#define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
#else
/* little endian -- will work on big endian, but slowly */
#define LOAD_BIG_32(addr) \
(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
#define LOAD_BIG_64(addr) \
(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \
((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
#endif /* _BIG_ENDIAN */
#if !defined(__amd64)
/* SHA256 Transform */
static void
SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
{
uint32_t a = ctx->state.s32[0];
uint32_t b = ctx->state.s32[1];
uint32_t c = ctx->state.s32[2];
uint32_t d = ctx->state.s32[3];
uint32_t e = ctx->state.s32[4];
uint32_t f = ctx->state.s32[5];
uint32_t g = ctx->state.s32[6];
uint32_t h = ctx->state.s32[7];
uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
uint32_t T1, T2;
if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
blk = (uint8_t *)ctx->buf_un.buf32;
}
/* LINTED E_BAD_PTR_CAST_ALIGN */
w0 = LOAD_BIG_32(blk + 4 * 0);
SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w1 = LOAD_BIG_32(blk + 4 * 1);
SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w2 = LOAD_BIG_32(blk + 4 * 2);
SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w3 = LOAD_BIG_32(blk + 4 * 3);
SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w4 = LOAD_BIG_32(blk + 4 * 4);
SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w5 = LOAD_BIG_32(blk + 4 * 5);
SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w6 = LOAD_BIG_32(blk + 4 * 6);
SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w7 = LOAD_BIG_32(blk + 4 * 7);
SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w8 = LOAD_BIG_32(blk + 4 * 8);
SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w9 = LOAD_BIG_32(blk + 4 * 9);
SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w10 = LOAD_BIG_32(blk + 4 * 10);
SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w11 = LOAD_BIG_32(blk + 4 * 11);
SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w12 = LOAD_BIG_32(blk + 4 * 12);
SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w13 = LOAD_BIG_32(blk + 4 * 13);
SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w14 = LOAD_BIG_32(blk + 4 * 14);
SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
/* LINTED E_BAD_PTR_CAST_ALIGN */
w15 = LOAD_BIG_32(blk + 4 * 15);
SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
ctx->state.s32[0] += a;
ctx->state.s32[1] += b;
ctx->state.s32[2] += c;
ctx->state.s32[3] += d;
ctx->state.s32[4] += e;
ctx->state.s32[5] += f;
ctx->state.s32[6] += g;
ctx->state.s32[7] += h;
}
#endif /* !__amd64 */
/*
* Encode()
*
* purpose: to convert a list of numbers from little endian to big endian
* input: uint8_t * : place to store the converted big endian numbers
* uint32_t * : place to get numbers to convert from
* size_t : the length of the input in bytes
* output: void
*/
static void
Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
size_t len)
{
size_t i, j;
for (i = 0, j = 0; j < len; i++, j += 4) {
output[j] = (input[i] >> 24) & 0xff;
output[j + 1] = (input[i] >> 16) & 0xff;
output[j + 2] = (input[i] >> 8) & 0xff;
output[j + 3] = input[i] & 0xff;
}
}
void
SHA2Init(uint64_t mech, SHA2_CTX *ctx)
{
switch (mech) {
case SHA256_MECH_INFO_TYPE:
case SHA256_HMAC_MECH_INFO_TYPE:
case SHA256_HMAC_GEN_MECH_INFO_TYPE:
ctx->state.s32[0] = 0x6a09e667U;
ctx->state.s32[1] = 0xbb67ae85U;
ctx->state.s32[2] = 0x3c6ef372U;
ctx->state.s32[3] = 0xa54ff53aU;
ctx->state.s32[4] = 0x510e527fU;
ctx->state.s32[5] = 0x9b05688cU;
ctx->state.s32[6] = 0x1f83d9abU;
ctx->state.s32[7] = 0x5be0cd19U;
break;
default:
cmn_err(CE_PANIC,
"sha2_init: failed to find a supported algorithm: 0x%x",
(uint32_t)mech);
}
ctx->algotype = (uint32_t)mech;
ctx->count.c64[0] = ctx->count.c64[1] = 0;
}
void
SHA256Init(SHA256_CTX *ctx)
{
SHA2Init(SHA256, ctx);
}
/*
* SHA2Update()
*
* purpose: continues an sha2 digest operation, using the message block
* to update the context.
* input: SHA2_CTX * : the context to update
* void * : the message block
* size_t : the length of the message block, in bytes
* output: void
*/
void
SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
{
uint32_t i, buf_index, buf_len, buf_limit;
const uint8_t *input = inptr;
uint32_t algotype = ctx->algotype;
#if defined(__amd64)
uint32_t block_count;
#endif /* !__amd64 */
/* check for noop */
if (input_len == 0)
return;
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
buf_limit = 64;
/* compute number of bytes mod 64 */
buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
/* update number of bits */
if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
ctx->count.c32[0]++;
ctx->count.c32[0] += (input_len >> 29);
} else {
buf_limit = 128;
/* compute number of bytes mod 128 */
buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
/* update number of bits */
if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
ctx->count.c64[0]++;
ctx->count.c64[0] += (input_len >> 29);
}
buf_len = buf_limit - buf_index;
/* transform as many times as possible */
i = 0;
if (input_len >= buf_len) {
/*
* general optimization:
*
* only do initial bcopy() and SHA2Transform() if
* buf_index != 0. if buf_index == 0, we're just
* wasting our time doing the bcopy() since there
* wasn't any data left over from a previous call to
* SHA2Update().
*/
if (buf_index) {
bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
SHA256Transform(ctx, ctx->buf_un.buf8);
i = buf_len;
}
#if !defined(__amd64)
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
for (; i + buf_limit - 1 < input_len; i += buf_limit) {
SHA256Transform(ctx, &input[i]);
}
}
#else
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
block_count = (input_len - i) >> 6;
if (block_count > 0) {
SHA256TransformBlocks(ctx, &input[i],
block_count);
i += block_count << 6;
}
}
#endif /* !__amd64 */
/*
* general optimization:
*
* if i and input_len are the same, return now instead
* of calling bcopy(), since the bcopy() in this case
* will be an expensive noop.
*/
if (input_len == i)
return;
buf_index = 0;
}
/* buffer remaining input */
bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
}
/*
* SHA2Final()
*
* purpose: ends an sha2 digest operation, finalizing the message digest and
* zeroing the context.
* input: uchar_t * : a buffer to store the digest
* : The function actually uses void* because many
* : callers pass things other than uchar_t here.
* SHA2_CTX * : the context to finalize, save, and zero
* output: void
*/
void
SHA2Final(void *digest, SHA2_CTX *ctx)
{
uint8_t bitcount_be[sizeof (ctx->count.c32)];
uint32_t index;
uint32_t algotype = ctx->algotype;
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
index = (ctx->count.c32[1] >> 3) & 0x3f;
Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
}
/* zeroize sensitive information */
bzero(ctx, sizeof (*ctx));
}