mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
OpenZFS 4185 - add new cryptographic checksums to ZFS: SHA-512, Skein, Edon-R
Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Approved by: Garrett D'Amore <garrett@damore.org> Ported by: Tony Hutter <hutter2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/4185 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/45818ee Porting Notes: This code is ported on top of the Illumos Crypto Framework code: https://github.com/zfsonlinux/zfs/pull/4329/commits/b5e030c8dbb9cd393d313571dee4756fbba8c22d The list of porting changes includes: - Copied module/icp/include/sha2/sha2.h directly from illumos - Removed from module/icp/algs/sha2/sha2.c: #pragma inline(SHA256Init, SHA384Init, SHA512Init) - Added 'ctx' to lib/libzfs/libzfs_sendrecv.c:zio_checksum_SHA256() since it now takes in an extra parameter. - Added CTASSERT() to assert.h from for module/zfs/edonr_zfs.c - Added skein & edonr to libicp/Makefile.am - Added sha512.S. It was generated from sha512-x86_64.pl in Illumos. - Updated ztest.c with new fletcher_4_*() args; used NULL for new CTX argument. - In icp/algs/edonr/edonr_byteorder.h, Removed the #if defined(__linux) section to not #include the non-existant endian.h. - In skein_test.c, renane NULL to 0 in "no test vector" array entries to get around a compiler warning. - Fixup test files: - Rename <sys/varargs.h> -> <varargs.h>, <strings.h> -> <string.h>, - Remove <note.h> and define NOTE() as NOP. - Define u_longlong_t - Rename "#!/usr/bin/ksh" -> "#!/bin/ksh -p" - Rename NULL to 0 in "no test vector" array entries to get around a compiler warning. - Remove "for isa in $($ISAINFO); do" stuff - Add/update Makefiles - Add some userspace headers like stdio.h/stdlib.h in places of sys/types.h. - EXPORT_SYMBOL *_Init/*_Update/*_Final... routines in ICP modules. - Update scripts/zfs2zol-patch.sed - include <sys/sha2.h> in sha2_impl.h - Add sha2.h to include/sys/Makefile.am - Add skein and edonr dirs to icp Makefile - Add new checksums to zpool_get.cfg - Move checksum switch block from zfs_secpolicy_setprop() to zfs_check_settable() - Fix -Wuninitialized error in edonr_byteorder.h on PPC - Fix stack frame size errors on ARM32 - Don't unroll loops in Skein on 32-bit to save stack space - Add memory barriers in sha2.c on 32-bit to save stack space - Add filetest_001_pos.ksh checksum sanity test - Add option to write psudorandom data in file_write utility
This commit is contained in:
@@ -36,6 +36,7 @@ $(MODULE)-objs += dsl_pool.o
|
||||
$(MODULE)-objs += dsl_prop.o
|
||||
$(MODULE)-objs += dsl_scan.o
|
||||
$(MODULE)-objs += dsl_synctask.o
|
||||
$(MODULE)-objs += edonr_zfs.o
|
||||
$(MODULE)-objs += fm.o
|
||||
$(MODULE)-objs += gzip.o
|
||||
$(MODULE)-objs += lzjb.o
|
||||
@@ -49,6 +50,7 @@ $(MODULE)-objs += refcount.o
|
||||
$(MODULE)-objs += rrwlock.o
|
||||
$(MODULE)-objs += sa.o
|
||||
$(MODULE)-objs += sha256.o
|
||||
$(MODULE)-objs += skein_zfs.o
|
||||
$(MODULE)-objs += spa.o
|
||||
$(MODULE)-objs += spa_boot.o
|
||||
$(MODULE)-objs += spa_config.o
|
||||
|
||||
+2
-2
@@ -1382,7 +1382,7 @@ arc_cksum_verify(arc_buf_t *buf)
|
||||
return;
|
||||
}
|
||||
|
||||
fletcher_2_native(buf->b_data, arc_buf_size(buf), &zc);
|
||||
fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc);
|
||||
if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc))
|
||||
panic("buffer modified while frozen!");
|
||||
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
|
||||
@@ -1495,7 +1495,7 @@ arc_cksum_compute(arc_buf_t *buf)
|
||||
ASSERT(!ARC_BUF_COMPRESSED(buf));
|
||||
hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
|
||||
KM_SLEEP);
|
||||
fletcher_2_native(buf->b_data, arc_buf_size(buf),
|
||||
fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL,
|
||||
hdr->b_l1hdr.b_freeze_cksum);
|
||||
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
|
||||
arc_buf_watch(buf);
|
||||
|
||||
+2
-1
@@ -3814,7 +3814,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite);
|
||||
mutex_exit(&db->db_mtx);
|
||||
} else if (db->db_state == DB_NOFILL) {
|
||||
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
|
||||
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
|
||||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
|
||||
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
||||
&dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
|
||||
dbuf_write_nofill_ready, NULL, NULL,
|
||||
|
||||
+3
-2
@@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -62,7 +62,8 @@ ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
|
||||
spa_t *spa = ddt->ddt_spa;
|
||||
objset_t *os = ddt->ddt_os;
|
||||
uint64_t *objectp = &ddt->ddt_object[type][class];
|
||||
boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup;
|
||||
boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP;
|
||||
char name[DDT_NAMELEN];
|
||||
|
||||
ddt_object_name(ddt, type, class, name);
|
||||
|
||||
+15
-9
@@ -1445,7 +1445,8 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
|
||||
|
||||
ASSERT(BP_EQUAL(bp, bp_orig));
|
||||
ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF);
|
||||
ASSERT(zio_checksum_table[chksum].ci_dedup);
|
||||
ASSERT(zio_checksum_table[chksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_NOPWRITE);
|
||||
}
|
||||
dr->dt.dl.dr_overridden_by = *zio->io_bp;
|
||||
dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
|
||||
@@ -1792,8 +1793,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||
* as well. Otherwise, the metadata checksum defaults
|
||||
* to fletcher4.
|
||||
*/
|
||||
if (zio_checksum_table[checksum].ci_correctable < 1 ||
|
||||
zio_checksum_table[checksum].ci_eck)
|
||||
if (!(zio_checksum_table[checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_METADATA) ||
|
||||
(zio_checksum_table[checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_EMBEDDED))
|
||||
checksum = ZIO_CHECKSUM_FLETCHER_4;
|
||||
|
||||
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
|
||||
@@ -1832,17 +1835,20 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||
*/
|
||||
if (dedup_checksum != ZIO_CHECKSUM_OFF) {
|
||||
dedup = (wp & WP_DMU_SYNC) ? B_FALSE : B_TRUE;
|
||||
if (!zio_checksum_table[checksum].ci_dedup)
|
||||
if (!(zio_checksum_table[checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP))
|
||||
dedup_verify = B_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable nopwrite if we have a cryptographically secure
|
||||
* checksum that has no known collisions (i.e. SHA-256)
|
||||
* and compression is enabled. We don't enable nopwrite if
|
||||
* dedup is enabled as the two features are mutually exclusive.
|
||||
* Enable nopwrite if we have secure enough checksum
|
||||
* algorithm (see comment in zio_nop_write) and
|
||||
* compression is enabled. We don't enable nopwrite if
|
||||
* dedup is enabled as the two features are mutually
|
||||
* exclusive.
|
||||
*/
|
||||
nopwrite = (!dedup && zio_checksum_table[checksum].ci_dedup &&
|
||||
nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_NOPWRITE) &&
|
||||
compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
|
||||
}
|
||||
|
||||
|
||||
@@ -346,7 +346,8 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
|
||||
} else {
|
||||
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
|
||||
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
|
||||
if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP)
|
||||
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
|
||||
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
|
||||
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/zio_compress.h>
|
||||
#include <zfs_fletcher.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
/*
|
||||
* The SPA supports block sizes up to 16MB. However, very large blocks
|
||||
@@ -108,6 +109,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
int used, compressed, uncompressed;
|
||||
int64_t delta;
|
||||
spa_feature_t f;
|
||||
|
||||
used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
|
||||
compressed = BP_GET_PSIZE(bp);
|
||||
@@ -134,10 +136,16 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
|
||||
dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
|
||||
dsl_dataset_phys(ds)->ds_unique_bytes += used;
|
||||
|
||||
if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
|
||||
ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
|
||||
B_TRUE;
|
||||
}
|
||||
|
||||
f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
|
||||
if (f != SPA_FEATURE_NONE)
|
||||
ds->ds_feature_activation_needed[f] = B_TRUE;
|
||||
|
||||
mutex_exit(&ds->ds_lock);
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
|
||||
compressed, uncompressed, tx);
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/edonr.h>
|
||||
#include <sys/zfs_context.h> /* For CTASSERT() */
|
||||
|
||||
#define EDONR_MODE 512
|
||||
#define EDONR_BLOCK_SIZE EdonR512_BLOCK_SIZE
|
||||
|
||||
/*
|
||||
* Native zio_checksum interface for the Edon-R hash function.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
zio_checksum_edonr_native(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
uint8_t digest[EDONR_MODE / 8];
|
||||
EdonRState ctx;
|
||||
|
||||
ASSERT(ctx_template != NULL);
|
||||
bcopy(ctx_template, &ctx, sizeof (ctx));
|
||||
EdonRUpdate(&ctx, buf, size * 8);
|
||||
EdonRFinal(&ctx, digest);
|
||||
bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word));
|
||||
}
|
||||
|
||||
/*
|
||||
* Byteswapped zio_checksum interface for the Edon-R hash function.
|
||||
*/
|
||||
void
|
||||
zio_checksum_edonr_byteswap(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
zio_cksum_t tmp;
|
||||
|
||||
zio_checksum_edonr_native(buf, size, ctx_template, &tmp);
|
||||
zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]);
|
||||
zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]);
|
||||
zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]);
|
||||
zcp->zc_word[3] = BSWAP_64(zcp->zc_word[3]);
|
||||
}
|
||||
|
||||
void *
|
||||
zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt)
|
||||
{
|
||||
EdonRState *ctx;
|
||||
uint8_t salt_block[EDONR_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* Edon-R needs all but the last hash invocation to be on full-size
|
||||
* blocks, but the salt is too small. Rather than simply padding it
|
||||
* with zeros, we expand the salt into a new salt block of proper
|
||||
* size by double-hashing it (the new salt block will be composed of
|
||||
* H(salt) || H(H(salt))).
|
||||
*/
|
||||
CTASSERT(EDONR_BLOCK_SIZE == 2 * (EDONR_MODE / 8));
|
||||
EdonRHash(EDONR_MODE, salt->zcs_bytes, sizeof (salt->zcs_bytes) * 8,
|
||||
salt_block);
|
||||
EdonRHash(EDONR_MODE, salt_block, EDONR_MODE, salt_block +
|
||||
EDONR_MODE / 8);
|
||||
|
||||
/*
|
||||
* Feed the new salt block into the hash function - this will serve
|
||||
* as our MAC key.
|
||||
*/
|
||||
ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
|
||||
EdonRInit(ctx, EDONR_MODE);
|
||||
EdonRUpdate(ctx, salt_block, sizeof (salt_block) * 8);
|
||||
return (ctx);
|
||||
}
|
||||
|
||||
void
|
||||
zio_checksum_edonr_tmpl_free(void *ctx_template)
|
||||
{
|
||||
EdonRState *ctx = ctx_template;
|
||||
|
||||
bzero(ctx, sizeof (*ctx));
|
||||
kmem_free(ctx, sizeof (*ctx));
|
||||
}
|
||||
+50
-96
@@ -19,110 +19,64 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/sha2.h>
|
||||
|
||||
/*
|
||||
* SHA-256 checksum, as specified in FIPS 180-3, available at:
|
||||
* http://csrc.nist.gov/publications/PubsFIPS.html
|
||||
*
|
||||
* This is a very compact implementation of SHA-256.
|
||||
* It is designed to be simple and portable, not to be fast.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The literal definitions of Ch() and Maj() according to FIPS 180-3 are:
|
||||
*
|
||||
* Ch(x, y, z) (x & y) ^ (~x & z)
|
||||
* Maj(x, y, z) (x & y) ^ (x & z) ^ (y & z)
|
||||
*
|
||||
* We use equivalent logical reductions here that require one less op.
|
||||
*/
|
||||
#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
|
||||
#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y))))
|
||||
#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s)))
|
||||
#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
|
||||
#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
|
||||
#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
|
||||
#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
|
||||
|
||||
static const uint32_t SHA256_K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
static void
|
||||
SHA256Transform(uint32_t *H, const uint8_t *cp)
|
||||
{
|
||||
uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
|
||||
|
||||
for (t = 0; t < 16; t++, cp += 4)
|
||||
W[t] = ((uint32_t)cp[0] << 24) | ((uint32_t)cp[1] << 16) |
|
||||
((uint32_t)cp[2] << 8) | (uint32_t)cp[3];
|
||||
|
||||
for (t = 16; t < 64; t++)
|
||||
W[t] = sigma1(W[t - 2]) + W[t - 7] +
|
||||
sigma0(W[t - 15]) + W[t - 16];
|
||||
|
||||
a = H[0]; b = H[1]; c = H[2]; d = H[3];
|
||||
e = H[4]; f = H[5]; g = H[6]; h = H[7];
|
||||
|
||||
for (t = 0; t < 64; t++) {
|
||||
T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
|
||||
T2 = SIGMA0(a) + Maj(a, b, c);
|
||||
h = g; g = f; f = e; e = d + T1;
|
||||
d = c; c = b; b = a; a = T1 + T2;
|
||||
}
|
||||
|
||||
H[0] += a; H[1] += b; H[2] += c; H[3] += d;
|
||||
H[4] += e; H[5] += f; H[6] += g; H[7] += h;
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
zio_checksum_SHA256(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
|
||||
uint8_t pad[128];
|
||||
int i, padsize;
|
||||
SHA2_CTX ctx;
|
||||
zio_cksum_t tmp;
|
||||
|
||||
for (i = 0; i < (size & ~63ULL); i += 64)
|
||||
SHA256Transform(H, (uint8_t *)buf + i);
|
||||
SHA2Init(SHA256, &ctx);
|
||||
SHA2Update(&ctx, buf, size);
|
||||
SHA2Final(&tmp, &ctx);
|
||||
|
||||
for (padsize = 0; i < size; i++)
|
||||
pad[padsize++] = *((uint8_t *)buf + i);
|
||||
|
||||
for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
|
||||
pad[padsize] = 0;
|
||||
|
||||
for (i = 56; i >= 0; i -= 8)
|
||||
pad[padsize++] = (size << 3) >> i;
|
||||
|
||||
for (i = 0; i < padsize; i += 64)
|
||||
SHA256Transform(H, pad + i);
|
||||
|
||||
ZIO_SET_CHECKSUM(zcp,
|
||||
(uint64_t)H[0] << 32 | H[1],
|
||||
(uint64_t)H[2] << 32 | H[3],
|
||||
(uint64_t)H[4] << 32 | H[5],
|
||||
(uint64_t)H[6] << 32 | H[7]);
|
||||
/*
|
||||
* A prior implementation of this function had a
|
||||
* private SHA256 implementation always wrote things out in
|
||||
* Big Endian and there wasn't a byteswap variant of it.
|
||||
* To preseve on disk compatibility we need to force that
|
||||
* behaviour.
|
||||
*/
|
||||
zcp->zc_word[0] = BE_64(tmp.zc_word[0]);
|
||||
zcp->zc_word[1] = BE_64(tmp.zc_word[1]);
|
||||
zcp->zc_word[2] = BE_64(tmp.zc_word[2]);
|
||||
zcp->zc_word[3] = BE_64(tmp.zc_word[3]);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
zio_checksum_SHA512_native(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
SHA2_CTX ctx;
|
||||
|
||||
SHA2Init(SHA512_256, &ctx);
|
||||
SHA2Update(&ctx, buf, size);
|
||||
SHA2Final(zcp, &ctx);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
zio_checksum_SHA512_byteswap(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
zio_cksum_t tmp;
|
||||
|
||||
zio_checksum_SHA512_native(buf, size, ctx_template, &tmp);
|
||||
zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
|
||||
zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
|
||||
zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
|
||||
zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/skein.h>
|
||||
|
||||
/*
|
||||
* Computes a native 256-bit skein MAC checksum. Please note that this
|
||||
* function requires the presence of a ctx_template that should be allocated
|
||||
* using zio_checksum_skein_tmpl_init.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
zio_checksum_skein_native(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
Skein_512_Ctxt_t ctx;
|
||||
|
||||
ASSERT(ctx_template != NULL);
|
||||
bcopy(ctx_template, &ctx, sizeof (ctx));
|
||||
(void) Skein_512_Update(&ctx, buf, size);
|
||||
(void) Skein_512_Final(&ctx, (uint8_t *)zcp);
|
||||
bzero(&ctx, sizeof (ctx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Byteswapped version of zio_checksum_skein_native. This just invokes
|
||||
* the native checksum function and byteswaps the resulting checksum (since
|
||||
* skein is internally endian-insensitive).
|
||||
*/
|
||||
void
|
||||
zio_checksum_skein_byteswap(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
zio_cksum_t tmp;
|
||||
|
||||
zio_checksum_skein_native(buf, size, ctx_template, &tmp);
|
||||
zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
|
||||
zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
|
||||
zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
|
||||
zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates a skein MAC template suitable for using in skein MAC checksum
|
||||
* computations and returns a pointer to it.
|
||||
*/
|
||||
void *
|
||||
zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt)
|
||||
{
|
||||
Skein_512_Ctxt_t *ctx;
|
||||
|
||||
ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
|
||||
(void) Skein_512_InitExt(ctx, sizeof (zio_cksum_t) * 8, 0,
|
||||
salt->zcs_bytes, sizeof (salt->zcs_bytes));
|
||||
return (ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Frees a skein context template previously allocated using
|
||||
* zio_checksum_skein_tmpl_init.
|
||||
*/
|
||||
void
|
||||
zio_checksum_skein_tmpl_free(void *ctx_template)
|
||||
{
|
||||
Skein_512_Ctxt_t *ctx = ctx_template;
|
||||
|
||||
bzero(ctx, sizeof (*ctx));
|
||||
kmem_free(ctx, sizeof (*ctx));
|
||||
}
|
||||
@@ -25,6 +25,7 @@
|
||||
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -2675,6 +2676,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
|
||||
}
|
||||
|
||||
/* Grab the checksum salt from the MOS. */
|
||||
error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_CHECKSUM_SALT, 1,
|
||||
sizeof (spa->spa_cksum_salt.zcs_bytes),
|
||||
spa->spa_cksum_salt.zcs_bytes);
|
||||
if (error == ENOENT) {
|
||||
/* Generate a new salt for subsequent use */
|
||||
(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
|
||||
sizeof (spa->spa_cksum_salt.zcs_bytes));
|
||||
} else if (error != 0) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
|
||||
if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
|
||||
@@ -3929,6 +3943,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
if (version >= SPA_VERSION_ZPOOL_HISTORY)
|
||||
spa_history_create_obj(spa, tx);
|
||||
|
||||
/*
|
||||
* Generate some random noise for salted checksums to operate on.
|
||||
*/
|
||||
(void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
|
||||
sizeof (spa->spa_cksum_salt.zcs_bytes));
|
||||
|
||||
/*
|
||||
* Set pool properties.
|
||||
*/
|
||||
@@ -6406,6 +6426,20 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
|
||||
if (lz4_en && !lz4_ac)
|
||||
spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we haven't written the salt, do so now. Note that the
|
||||
* feature may not be activated yet, but that's fine since
|
||||
* the presence of this ZAP entry is backwards compatible.
|
||||
*/
|
||||
if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_CHECKSUM_SALT) == ENOENT) {
|
||||
VERIFY0(zap_add(spa->spa_meta_objset,
|
||||
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1,
|
||||
sizeof (spa->spa_cksum_salt.zcs_bytes),
|
||||
spa->spa_cksum_salt.zcs_bytes, tx));
|
||||
}
|
||||
|
||||
rrw_exit(&dp->dp_config_rwlock, FTAG);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -53,7 +54,7 @@
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/kstat.h>
|
||||
#include "zfs_prop.h"
|
||||
#include "zfeature_common.h"
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/*
|
||||
* SPA locking
|
||||
@@ -558,6 +559,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
@@ -686,6 +688,8 @@ spa_remove(spa_t *spa)
|
||||
for (t = 0; t < TXG_SIZE; t++)
|
||||
bplist_destroy(&spa->spa_free_bplist[t]);
|
||||
|
||||
zio_checksum_templates_free(spa);
|
||||
|
||||
cv_destroy(&spa->spa_async_cv);
|
||||
cv_destroy(&spa->spa_evicting_os_cv);
|
||||
cv_destroy(&spa->spa_proc_cv);
|
||||
@@ -699,6 +703,7 @@ spa_remove(spa_t *spa)
|
||||
mutex_destroy(&spa->spa_history_lock);
|
||||
mutex_destroy(&spa->spa_proc_lock);
|
||||
mutex_destroy(&spa->spa_props_lock);
|
||||
mutex_destroy(&spa->spa_cksum_tmpls_lock);
|
||||
mutex_destroy(&spa->spa_scrub_lock);
|
||||
mutex_destroy(&spa->spa_suspend_lock);
|
||||
mutex_destroy(&spa->spa_vdev_top_lock);
|
||||
|
||||
@@ -1604,6 +1604,13 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
|
||||
int c, ret = 0;
|
||||
raidz_col_t *rc;
|
||||
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
enum zio_checksum checksum = (bp == NULL ? zio->io_prop.zp_checksum :
|
||||
(BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
|
||||
|
||||
if (checksum == ZIO_CHECKSUM_NOPARITY)
|
||||
return (ret);
|
||||
|
||||
for (c = 0; c < rm->rm_firstdatacol; c++) {
|
||||
rc = &rm->rm_col[c];
|
||||
if (!rc->rc_tried || rc->rc_error != 0)
|
||||
|
||||
@@ -253,4 +253,16 @@ zpool_feature_init(void)
|
||||
"Variable on-disk size of dnodes.",
|
||||
ZFEATURE_FLAG_PER_DATASET, large_dnode_deps);
|
||||
}
|
||||
zfeature_register(SPA_FEATURE_SHA512,
|
||||
"org.illumos:sha512", "sha512",
|
||||
"SHA-512/256 hash algorithm.",
|
||||
ZFEATURE_FLAG_PER_DATASET, NULL);
|
||||
zfeature_register(SPA_FEATURE_SKEIN,
|
||||
"org.illumos:skein", "skein",
|
||||
"Skein hash algorithm.",
|
||||
ZFEATURE_FLAG_PER_DATASET, NULL);
|
||||
zfeature_register(SPA_FEATURE_EDONR,
|
||||
"org.illumos:edonr", "edonr",
|
||||
"Edon-R hash algorithm.",
|
||||
ZFEATURE_FLAG_PER_DATASET, NULL);
|
||||
}
|
||||
|
||||
+42
-5
@@ -185,6 +185,7 @@
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -3809,11 +3810,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
break;
|
||||
|
||||
case ZFS_PROP_DEDUP:
|
||||
if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
break;
|
||||
|
||||
case ZFS_PROP_VOLBLOCKSIZE:
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
/* Record sizes above 128k need the feature to be enabled */
|
||||
@@ -3893,6 +3889,47 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
break;
|
||||
case ZFS_PROP_CHECKSUM:
|
||||
case ZFS_PROP_DEDUP:
|
||||
{
|
||||
spa_feature_t feature;
|
||||
spa_t *spa;
|
||||
uint64_t intval;
|
||||
int err;
|
||||
|
||||
/* dedup feature version checks */
|
||||
if (prop == ZFS_PROP_DEDUP &&
|
||||
zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
if (nvpair_value_uint64(pair, &intval) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* check prop value is enabled in features */
|
||||
feature = zio_checksum_to_feature(intval);
|
||||
if (feature == SPA_FEATURE_NONE)
|
||||
break;
|
||||
|
||||
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
|
||||
return (err);
|
||||
/*
|
||||
* Salted checksums are not supported on root pools.
|
||||
*/
|
||||
if (spa_bootfs(spa) != 0 &&
|
||||
intval < ZIO_CHECKSUM_FUNCTIONS &&
|
||||
(zio_checksum_table[intval].ci_flags &
|
||||
ZCHECKSUM_FLAG_SALTED)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ERANGE));
|
||||
}
|
||||
if (!spa_feature_is_enabled(spa, feature)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
spa_close(spa, FTAG);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
+25
-12
@@ -979,7 +979,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
|
||||
|
||||
zio->io_prop.zp_checksum = checksum;
|
||||
|
||||
if (zio_checksum_table[checksum].ci_eck) {
|
||||
if (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
|
||||
/*
|
||||
* zec checksums are necessarily destructive -- they modify
|
||||
* the end of the write buffer to hold the verifier/checksum.
|
||||
@@ -1190,8 +1190,8 @@ zio_write_bp_init(zio_t *zio)
|
||||
if (BP_IS_HOLE(bp) || !zp->zp_dedup)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
|
||||
zp->zp_dedup_verify);
|
||||
ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
|
||||
|
||||
if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
|
||||
BP_SET_DEDUP(bp, 1);
|
||||
@@ -2198,12 +2198,22 @@ zio_write_gang_block(zio_t *pio)
|
||||
}
|
||||
|
||||
/*
|
||||
* The zio_nop_write stage in the pipeline determines if allocating
|
||||
* a new bp is necessary. By leveraging a cryptographically secure checksum,
|
||||
* such as SHA256, we can compare the checksums of the new data and the old
|
||||
* to determine if allocating a new block is required. The nopwrite
|
||||
* feature can handle writes in either syncing or open context (i.e. zil
|
||||
* writes) and as a result is mutually exclusive with dedup.
|
||||
* The zio_nop_write stage in the pipeline determines if allocating a
|
||||
* new bp is necessary. The nopwrite feature can handle writes in
|
||||
* either syncing or open context (i.e. zil writes) and as a result is
|
||||
* mutually exclusive with dedup.
|
||||
*
|
||||
* By leveraging a cryptographically secure checksum, such as SHA256, we
|
||||
* can compare the checksums of the new data and the old to determine if
|
||||
* allocating a new block is required. Note that our requirements for
|
||||
* cryptographic strength are fairly weak: there can't be any accidental
|
||||
* hash collisions, but we don't need to be secure against intentional
|
||||
* (malicious) collisions. To trigger a nopwrite, you have to be able
|
||||
* to write the file to begin with, and triggering an incorrect (hash
|
||||
* collision) nopwrite is no worse than simply writing to the file.
|
||||
* That said, there are no known attacks against the checksum algorithms
|
||||
* used for nopwrite, assuming that the salt and the checksums
|
||||
* themselves remain secret.
|
||||
*/
|
||||
static int
|
||||
zio_nop_write(zio_t *zio)
|
||||
@@ -2226,7 +2236,8 @@ zio_nop_write(zio_t *zio)
|
||||
* allocate a new bp.
|
||||
*/
|
||||
if (BP_IS_HOLE(bp_orig) ||
|
||||
!zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_dedup ||
|
||||
!(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
|
||||
ZCHECKSUM_FLAG_NOPWRITE) ||
|
||||
BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
|
||||
BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
|
||||
BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
|
||||
@@ -2238,7 +2249,8 @@ zio_nop_write(zio_t *zio)
|
||||
* avoid allocating a new bp and issuing any I/O.
|
||||
*/
|
||||
if (ZIO_CHECKSUM_EQUAL(bp->blk_cksum, bp_orig->blk_cksum)) {
|
||||
ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup);
|
||||
ASSERT(zio_checksum_table[zp->zp_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_NOPWRITE);
|
||||
ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(bp_orig));
|
||||
ASSERT3U(BP_GET_LSIZE(bp), ==, BP_GET_LSIZE(bp_orig));
|
||||
ASSERT(zp->zp_compress != ZIO_COMPRESS_OFF);
|
||||
@@ -2566,7 +2578,8 @@ zio_ddt_write(zio_t *zio)
|
||||
* we can't resolve it, so just convert to an ordinary write.
|
||||
* (And automatically e-mail a paper to Nature?)
|
||||
*/
|
||||
if (!zio_checksum_table[zp->zp_checksum].ci_dedup) {
|
||||
if (!(zio_checksum_table[zp->zp_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP)) {
|
||||
zp->zp_checksum = spa_dedup_checksum(spa);
|
||||
zio_pop_transforms(zio);
|
||||
zio->io_stage = ZIO_STAGE_OPEN;
|
||||
|
||||
+144
-19
@@ -21,10 +21,12 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright 2013 Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zil.h>
|
||||
@@ -58,28 +60,96 @@
|
||||
* checksum function of the appropriate strength. When reading a block,
|
||||
* we compare the expected checksum against the actual checksum, which we
|
||||
* compute via the checksum function specified by BP_GET_CHECKSUM(bp).
|
||||
*
|
||||
* SALTED CHECKSUMS
|
||||
*
|
||||
* To enable the use of less secure hash algorithms with dedup, we
|
||||
* introduce the notion of salted checksums (MACs, really). A salted
|
||||
* checksum is fed both a random 256-bit value (the salt) and the data
|
||||
* to be checksummed. This salt is kept secret (stored on the pool, but
|
||||
* never shown to the user). Thus even if an attacker knew of collision
|
||||
* weaknesses in the hash algorithm, they won't be able to mount a known
|
||||
* plaintext attack on the DDT, since the actual hash value cannot be
|
||||
* known ahead of time. How the salt is used is algorithm-specific
|
||||
* (some might simply prefix it to the data block, others might need to
|
||||
* utilize a full-blown HMAC). On disk the salt is stored in a ZAP
|
||||
* object in the MOS (DMU_POOL_CHECKSUM_SALT).
|
||||
*
|
||||
* CONTEXT TEMPLATES
|
||||
*
|
||||
* Some hashing algorithms need to perform a substantial amount of
|
||||
* initialization work (e.g. salted checksums above may need to pre-hash
|
||||
* the salt) before being able to process data. Performing this
|
||||
* redundant work for each block would be wasteful, so we instead allow
|
||||
* a checksum algorithm to do the work once (the first time it's used)
|
||||
* and then keep this pre-initialized context as a template inside the
|
||||
* spa_t (spa_cksum_tmpls). If the zio_checksum_info_t contains
|
||||
* non-NULL ci_tmpl_init and ci_tmpl_free callbacks, they are used to
|
||||
* construct and destruct the pre-initialized checksum context. The
|
||||
* pre-initialized context is then reused during each checksum
|
||||
* invocation and passed to the checksum function.
|
||||
*/
|
||||
|
||||
/*ARGSUSED*/
|
||||
static void
|
||||
zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
zio_checksum_off(const void *buf, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
|
||||
{{NULL, NULL}, 0, 0, 0, "inherit"},
|
||||
{{NULL, NULL}, 0, 0, 0, "on"},
|
||||
{{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"},
|
||||
{{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"},
|
||||
{{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"},
|
||||
{{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"},
|
||||
{{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"},
|
||||
{{NULL, NULL}, NULL, NULL, 0, "inherit"},
|
||||
{{NULL, NULL}, NULL, NULL, 0, "on"},
|
||||
{{zio_checksum_off, zio_checksum_off},
|
||||
NULL, NULL, 0, "off"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
|
||||
"label"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
|
||||
"gang_header"},
|
||||
{{fletcher_2_native, fletcher_2_byteswap},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"},
|
||||
{{fletcher_2_native, fletcher_2_byteswap},
|
||||
NULL, NULL, 0, "fletcher2"},
|
||||
{{fletcher_4_native, fletcher_4_byteswap},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"},
|
||||
{{zio_checksum_SHA256, zio_checksum_SHA256},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
|
||||
ZCHECKSUM_FLAG_NOPWRITE, "sha256"},
|
||||
{{fletcher_4_native, fletcher_4_byteswap},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"},
|
||||
{{zio_checksum_off, zio_checksum_off},
|
||||
NULL, NULL, 0, "noparity"},
|
||||
{{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap},
|
||||
NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
|
||||
ZCHECKSUM_FLAG_NOPWRITE, "sha512"},
|
||||
{{zio_checksum_skein_native, zio_checksum_skein_byteswap},
|
||||
zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free,
|
||||
ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
|
||||
ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"},
|
||||
{{zio_checksum_edonr_native, zio_checksum_edonr_byteswap},
|
||||
zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free,
|
||||
ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
|
||||
ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
|
||||
};
|
||||
|
||||
spa_feature_t
|
||||
zio_checksum_to_feature(enum zio_checksum cksum)
|
||||
{
|
||||
switch (cksum) {
|
||||
case ZIO_CHECKSUM_SHA512:
|
||||
return (SPA_FEATURE_SHA512);
|
||||
case ZIO_CHECKSUM_SKEIN:
|
||||
return (SPA_FEATURE_SKEIN);
|
||||
case ZIO_CHECKSUM_EDONR:
|
||||
return (SPA_FEATURE_EDONR);
|
||||
default:
|
||||
return (SPA_FEATURE_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
enum zio_checksum
|
||||
zio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
|
||||
{
|
||||
@@ -113,7 +183,8 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
|
||||
if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
|
||||
return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
|
||||
|
||||
ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
|
||||
ASSERT((zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP) ||
|
||||
(child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
|
||||
|
||||
return (child);
|
||||
@@ -145,6 +216,30 @@ zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
|
||||
ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calls the template init function of a checksum which supports context
|
||||
* templates and installs the template into the spa_t.
|
||||
*/
|
||||
static void
|
||||
zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
|
||||
{
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
|
||||
if (ci->ci_tmpl_init == NULL)
|
||||
return;
|
||||
if (spa->spa_cksum_tmpls[checksum] != NULL)
|
||||
return;
|
||||
|
||||
VERIFY(ci->ci_tmpl_free != NULL);
|
||||
mutex_enter(&spa->spa_cksum_tmpls_lock);
|
||||
if (spa->spa_cksum_tmpls[checksum] == NULL) {
|
||||
spa->spa_cksum_tmpls[checksum] =
|
||||
ci->ci_tmpl_init(&spa->spa_cksum_salt);
|
||||
VERIFY(spa->spa_cksum_tmpls[checksum] != NULL);
|
||||
}
|
||||
mutex_exit(&spa->spa_cksum_tmpls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the checksum.
|
||||
*/
|
||||
@@ -156,11 +251,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
uint64_t offset = zio->io_offset;
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
zio_cksum_t cksum;
|
||||
spa_t *spa = zio->io_spa;
|
||||
|
||||
ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
|
||||
ASSERT(ci->ci_func[0] != NULL);
|
||||
|
||||
if (ci->ci_eck) {
|
||||
zio_checksum_template_init(checksum, spa);
|
||||
|
||||
if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
|
||||
zio_eck_t *eck;
|
||||
|
||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||
@@ -179,10 +277,12 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
else
|
||||
bp->blk_cksum = eck->zec_cksum;
|
||||
eck->zec_magic = ZEC_MAGIC;
|
||||
ci->ci_func[0](data, size, &cksum);
|
||||
ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum],
|
||||
&cksum);
|
||||
eck->zec_cksum = cksum;
|
||||
} else {
|
||||
ci->ci_func[0](data, size, &bp->blk_cksum);
|
||||
ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum],
|
||||
&bp->blk_cksum);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,13 +291,15 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
|
||||
void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info)
|
||||
{
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
zio_cksum_t actual_cksum, expected_cksum;
|
||||
int byteswap;
|
||||
zio_cksum_t actual_cksum, expected_cksum;
|
||||
|
||||
if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (ci->ci_eck) {
|
||||
zio_checksum_template_init(checksum, spa);
|
||||
|
||||
if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
|
||||
zio_eck_t *eck;
|
||||
zio_cksum_t verifier;
|
||||
|
||||
@@ -235,7 +337,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
|
||||
|
||||
expected_cksum = eck->zec_cksum;
|
||||
eck->zec_cksum = verifier;
|
||||
ci->ci_func[byteswap](data, size, &actual_cksum);
|
||||
ci->ci_func[byteswap](data, size,
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
eck->zec_cksum = expected_cksum;
|
||||
|
||||
if (byteswap) {
|
||||
@@ -245,7 +348,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
|
||||
} else {
|
||||
byteswap = BP_SHOULD_BYTESWAP(bp);
|
||||
expected_cksum = bp->blk_cksum;
|
||||
ci->ci_func[byteswap](data, size, &actual_cksum);
|
||||
ci->ci_func[byteswap](data, size,
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
}
|
||||
|
||||
if (info != NULL) {
|
||||
@@ -286,3 +390,24 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by a spa_t that's about to be deallocated. This steps through
|
||||
* all of the checksum context templates and deallocates any that were
|
||||
* initialized using the algorithm-specific template init function.
|
||||
*/
|
||||
void
|
||||
zio_checksum_templates_free(spa_t *spa)
|
||||
{
|
||||
enum zio_checksum checksum;
|
||||
for (checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS;
|
||||
checksum++) {
|
||||
if (spa->spa_cksum_tmpls[checksum] != NULL) {
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
|
||||
VERIFY(ci->ci_tmpl_free != NULL);
|
||||
ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]);
|
||||
spa->spa_cksum_tmpls[checksum] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user