mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-27 11:29:36 +03:00
Raw receive fix and encrypted objset security fix
This patch fixes two problems with the encryption code. First, the current code does not correctly prohibit the DMU from updating dn_maxblkid during object truncation within a raw receive. This usually only causes issues when the truncating DRR_FREE record is aggregated with DRR_FREE records later in the receive, so it is relatively hard to hit. Second, this patch fixes a security issue where reading blocks within an encrypted object did not guarantee that the dnode block itself had ever been verified against its MAC. Usually the verification happened anyway when the bonus buffer was read, but some use cases (notably zvols) might never perform the check. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7632
This commit is contained in:
parent
3be1eb29da
commit
69830602de
@ -2137,13 +2137,17 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adjust encrypted and authenticated headers to accomodate the
|
* Adjust encrypted and authenticated headers to accomodate
|
||||||
* request if needed.
|
* the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are
|
||||||
|
* allowed to fail decryption due to keys not being loaded
|
||||||
|
* without being marked as an IO error.
|
||||||
*/
|
*/
|
||||||
if (HDR_PROTECTED(hdr)) {
|
if (HDR_PROTECTED(hdr)) {
|
||||||
error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
|
error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
|
||||||
zb, !!(flags & ARC_FILL_NOAUTH));
|
zb, !!(flags & ARC_FILL_NOAUTH));
|
||||||
if (error != 0) {
|
if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) {
|
||||||
|
return (error);
|
||||||
|
} else if (error != 0) {
|
||||||
if (hash_lock != NULL)
|
if (hash_lock != NULL)
|
||||||
mutex_enter(hash_lock);
|
mutex_enter(hash_lock);
|
||||||
arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
|
arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
|
||||||
|
@ -1119,6 +1119,64 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
|||||||
dbuf_rele_and_unlock(db, NULL, B_FALSE);
|
dbuf_rele_and_unlock(db, NULL, B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function ensures that, when doing a decrypting read of a block,
|
||||||
|
* we make sure we have decrypted the dnode associated with it. We must do
|
||||||
|
* this so that we ensure we are fully authenticating the checksum-of-MACs
|
||||||
|
* tree from the root of the objset down to this block. Indirect blocks are
|
||||||
|
* always verified against their secure checksum-of-MACs assuming that the
|
||||||
|
* dnode containing them is correct. Now that we are doing a decrypting read,
|
||||||
|
* we can be sure that the key is loaded and verify that assumption. This is
|
||||||
|
* especially important considering that we always read encrypted dnode
|
||||||
|
* blocks as raw data (without verifying their MACs) to start, and
|
||||||
|
* decrypt / authenticate them when we need to read an encrypted bonus buffer.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
objset_t *os = db->db_objset;
|
||||||
|
arc_buf_t *dnode_abuf;
|
||||||
|
dnode_t *dn;
|
||||||
|
zbookmark_phys_t zb;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
|
|
||||||
|
if (!os->os_encrypted || os->os_raw_receive ||
|
||||||
|
(flags & DB_RF_NO_DECRYPT) != 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
DB_DNODE_ENTER(db);
|
||||||
|
dn = DB_DNODE(db);
|
||||||
|
dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL;
|
||||||
|
|
||||||
|
if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) {
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
SET_BOOKMARK(&zb, dmu_objset_id(os),
|
||||||
|
DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
|
||||||
|
err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An error code of EACCES tells us that the key is still not
|
||||||
|
* available. This is ok if we are only reading authenticated
|
||||||
|
* (and therefore non-encrypted) blocks.
|
||||||
|
*/
|
||||||
|
if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID &&
|
||||||
|
!DMU_OT_IS_ENCRYPTED(dn->dn_type)) ||
|
||||||
|
(db->db_blkid == DMU_BONUS_BLKID &&
|
||||||
|
!DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
|
||||||
|
err = 0;
|
||||||
|
|
||||||
|
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||||
{
|
{
|
||||||
@ -1143,24 +1201,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||||||
*/
|
*/
|
||||||
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
|
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
|
||||||
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
|
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
|
||||||
arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
|
|
||||||
dn->dn_dbuf->db_buf : NULL;
|
|
||||||
|
|
||||||
/* if the underlying dnode block is encrypted, decrypt it */
|
/* if the underlying dnode block is encrypted, decrypt it */
|
||||||
if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
|
err = dbuf_read_verify_dnode_crypt(db, flags);
|
||||||
DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
|
|
||||||
(flags & DB_RF_NO_DECRYPT) == 0 &&
|
|
||||||
arc_is_encrypted(dn_buf)) {
|
|
||||||
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
|
|
||||||
DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
|
|
||||||
err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
|
|
||||||
&zb, B_TRUE);
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT3U(bonuslen, <=, db->db.db_size);
|
ASSERT3U(bonuslen, <=, db->db.db_size);
|
||||||
db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
|
db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
|
||||||
@ -1215,17 +1263,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
DB_DNODE_EXIT(db);
|
|
||||||
|
|
||||||
db->db_state = DB_READ;
|
|
||||||
mutex_exit(&db->db_mtx);
|
|
||||||
|
|
||||||
if (DBUF_IS_L2CACHEABLE(db))
|
|
||||||
aflags |= ARC_FLAG_L2CACHE;
|
|
||||||
|
|
||||||
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
|
|
||||||
db->db.db_object, db->db_level, db->db_blkid);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All bps of an encrypted os should have the encryption bit set.
|
* All bps of an encrypted os should have the encryption bit set.
|
||||||
* If this is not true it indicates tampering and we report an error.
|
* If this is not true it indicates tampering and we report an error.
|
||||||
@ -1234,11 +1271,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||||||
spa_log_error(db->db_objset->os_spa, &zb);
|
spa_log_error(db->db_objset->os_spa, &zb);
|
||||||
zfs_panic_recover("unencrypted block in encrypted "
|
zfs_panic_recover("unencrypted block in encrypted "
|
||||||
"object set %llu", dmu_objset_id(db->db_objset));
|
"object set %llu", dmu_objset_id(db->db_objset));
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
mutex_exit(&db->db_mtx);
|
||||||
return (SET_ERROR(EIO));
|
return (SET_ERROR(EIO));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = dbuf_read_verify_dnode_crypt(db, flags);
|
||||||
|
if (err != 0) {
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
mutex_exit(&db->db_mtx);
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
DB_DNODE_EXIT(db);
|
||||||
|
|
||||||
|
db->db_state = DB_READ;
|
||||||
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
|
if (DBUF_IS_L2CACHEABLE(db))
|
||||||
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
dbuf_add_ref(db, NULL);
|
dbuf_add_ref(db, NULL);
|
||||||
|
|
||||||
|
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
|
||||||
|
db->db.db_object, db->db_level, db->db_blkid);
|
||||||
|
|
||||||
zio_flags = (flags & DB_RF_CANFAIL) ?
|
zio_flags = (flags & DB_RF_CANFAIL) ?
|
||||||
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
|
||||||
|
|
||||||
@ -1358,14 +1415,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||||||
spa_t *spa = dn->dn_objset->os_spa;
|
spa_t *spa = dn->dn_objset->os_spa;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the arc buf is compressed or encrypted, we need to
|
* Ensure that this block's dnode has been decrypted if
|
||||||
* untransform it to read the data. This could happen during
|
* the caller has requested decrypted data.
|
||||||
* the "zfs receive" of a stream which is deduplicated and
|
|
||||||
* either raw or compressed. We do not need to do this if the
|
|
||||||
* caller wants raw encrypted data.
|
|
||||||
*/
|
*/
|
||||||
if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
|
err = dbuf_read_verify_dnode_crypt(db, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the arc buf is compressed or encrypted and the caller
|
||||||
|
* requested uncompressed data, we need to untransform it
|
||||||
|
* before returning. We also call arc_untransform() on any
|
||||||
|
* unauthenticated blocks, which will verify their MAC if
|
||||||
|
* the key is now available.
|
||||||
|
*/
|
||||||
|
if (err == 0 && db->db_buf != NULL &&
|
||||||
|
(flags & DB_RF_NO_DECRYPT) == 0 &&
|
||||||
(arc_is_encrypted(db->db_buf) ||
|
(arc_is_encrypted(db->db_buf) ||
|
||||||
|
arc_is_unauthenticated(db->db_buf) ||
|
||||||
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
|
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
|
|
||||||
@ -1376,7 +1441,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
|||||||
dbuf_set_data(db, db->db_buf);
|
dbuf_set_data(db, db->db_buf);
|
||||||
}
|
}
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
if (prefetch)
|
if (err == 0 && prefetch)
|
||||||
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
|
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
|
||||||
if ((flags & DB_RF_HAVESTRUCT) == 0)
|
if ((flags & DB_RF_HAVESTRUCT) == 0)
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
@ -1943,6 +2008,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
ddt_prefetch(os->os_spa, db->db_blkptr);
|
ddt_prefetch(os->os_spa, db->db_blkptr);
|
||||||
|
|
||||||
if (db->db_level == 0) {
|
if (db->db_level == 0) {
|
||||||
|
ASSERT(!db->db_objset->os_raw_receive ||
|
||||||
|
dn->dn_maxblkid >= db->db_blkid);
|
||||||
dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
|
dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
|
||||||
ASSERT(dn->dn_maxblkid >= db->db_blkid);
|
ASSERT(dn->dn_maxblkid >= db->db_blkid);
|
||||||
}
|
}
|
||||||
@ -3806,8 +3873,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||||||
if (db->db_level == 0) {
|
if (db->db_level == 0) {
|
||||||
mutex_enter(&dn->dn_mtx);
|
mutex_enter(&dn->dn_mtx);
|
||||||
if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
|
if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
|
||||||
db->db_blkid != DMU_SPILL_BLKID)
|
db->db_blkid != DMU_SPILL_BLKID) {
|
||||||
|
ASSERT0(db->db_objset->os_raw_receive);
|
||||||
dn->dn_phys->dn_maxblkid = db->db_blkid;
|
dn->dn_phys->dn_maxblkid = db->db_blkid;
|
||||||
|
}
|
||||||
mutex_exit(&dn->dn_mtx);
|
mutex_exit(&dn->dn_mtx);
|
||||||
|
|
||||||
if (dn->dn_type == DMU_OT_DNODE) {
|
if (dn->dn_type == DMU_OT_DNODE) {
|
||||||
|
@ -367,7 +367,12 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trunc) {
|
/*
|
||||||
|
* Do not truncate the maxblkid if we are performing a raw
|
||||||
|
* receive. The raw receive sets the mablkid manually and
|
||||||
|
* must not be overriden.
|
||||||
|
*/
|
||||||
|
if (trunc && !dn->dn_objset->os_raw_receive) {
|
||||||
ASSERTV(uint64_t off);
|
ASSERTV(uint64_t off);
|
||||||
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
|
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user