From 69830602de2d836013a91bd42cc8d36bbebb3aae Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 28 Jun 2018 12:20:34 -0400 Subject: [PATCH] Raw receive fix and encrypted objset security fix This patch fixes two problems with the encryption code. First, the current code does not correctly prohibit the DMU from updating dn_maxblkid during object truncation within a raw receive. This usually only causes issues when the truncating DRR_FREE record is aggregated with DRR_FREE records later in the receive, so it is relatively hard to hit. Second, this patch fixes a security issue where reading blocks within an encrypted object did not guarantee that the dnode block itself had ever been verified against its MAC. Usually the verification happened anyway when the bonus buffer was read, but some use cases (notably zvols) might never perform the check. Reviewed-by: Brian Behlendorf Reviewed by: Matthew Ahrens Signed-off-by: Tom Caputi Closes #7632 --- module/zfs/arc.c | 10 ++- module/zfs/dbuf.c | 137 ++++++++++++++++++++++++++++++---------- module/zfs/dnode_sync.c | 7 +- 3 files changed, 116 insertions(+), 38 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index bf514f00f..ab2564841 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2137,13 +2137,17 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, } /* - * Adjust encrypted and authenticated headers to accomodate the - * request if needed. + * Adjust encrypted and authenticated headers to accomodate + * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are + * allowed to fail decryption due to keys not being loaded + * without being marked as an IO error. */ if (HDR_PROTECTED(hdr)) { error = arc_fill_hdr_crypt(hdr, hash_lock, spa, zb, !!(flags & ARC_FILL_NOAUTH)); - if (error != 0) { + if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) { + return (error); + } else if (error != 0) { if (hash_lock != NULL) mutex_enter(hash_lock); arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index e3738db52..0a8e6d08b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1119,6 +1119,64 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, dbuf_rele_and_unlock(db, NULL, B_FALSE); } + +/* + * This function ensures that, when doing a decrypting read of a block, + * we make sure we have decrypted the dnode associated with it. We must do + * this so that we ensure we are fully authenticating the checksum-of-MACs + * tree from the root of the objset down to this block. Indirect blocks are + * always verified against their secure checksum-of-MACs assuming that the + * dnode containing them is correct. Now that we are doing a decrypting read, + * we can be sure that the key is loaded and verify that assumption. This is + * especially important considering that we always read encrypted dnode + * blocks as raw data (without verifying their MACs) to start, and + * decrypt / authenticate them when we need to read an encrypted bonus buffer. + */ +static int +dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) +{ + int err = 0; + objset_t *os = db->db_objset; + arc_buf_t *dnode_abuf; + dnode_t *dn; + zbookmark_phys_t zb; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (!os->os_encrypted || os->os_raw_receive || + (flags & DB_RF_NO_DECRYPT) != 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL; + + if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) { + DB_DNODE_EXIT(db); + return (0); + } + + SET_BOOKMARK(&zb, dmu_objset_id(os), + DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid); + err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE); + + /* + * An error code of EACCES tells us that the key is still not + * available. This is ok if we are only reading authenticated + * (and therefore non-encrypted) blocks. + */ + if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_type)) || + (db->db_blkid == DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)))) + err = 0; + + + DB_DNODE_EXIT(db); + + return (err); +} + static int dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) { @@ -1143,23 +1201,13 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) */ int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); - arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ? - dn->dn_dbuf->db_buf : NULL; /* if the underlying dnode block is encrypted, decrypt it */ - if (dn_buf != NULL && dn->dn_objset->os_encrypted && - DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) && - (flags & DB_RF_NO_DECRYPT) == 0 && - arc_is_encrypted(dn_buf)) { - SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), - DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid); - err = arc_untransform(dn_buf, dn->dn_objset->os_spa, - &zb, B_TRUE); - if (err != 0) { - DB_DNODE_EXIT(db); - mutex_exit(&db->db_mtx); - return (err); - } + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); } ASSERT3U(bonuslen, <=, db->db.db_size); @@ -1215,17 +1263,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) return (0); } - DB_DNODE_EXIT(db); - - db->db_state = DB_READ; - mutex_exit(&db->db_mtx); - - if (DBUF_IS_L2CACHEABLE(db)) - aflags |= ARC_FLAG_L2CACHE; - - SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), - db->db.db_object, db->db_level, db->db_blkid); - /* * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. @@ -1234,11 +1271,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) spa_log_error(db->db_objset->os_spa, &zb); zfs_panic_recover("unencrypted block in encrypted " "object set %llu", dmu_objset_id(db->db_objset)); + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); return (SET_ERROR(EIO)); } + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); + } + + DB_DNODE_EXIT(db); + + db->db_state = DB_READ; + mutex_exit(&db->db_mtx); + + if (DBUF_IS_L2CACHEABLE(db)) + aflags |= ARC_FLAG_L2CACHE; + dbuf_add_ref(db, NULL); + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + zio_flags = (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; @@ -1358,14 +1415,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) spa_t *spa = dn->dn_objset->os_spa; /* - * If the arc buf is compressed or encrypted, we need to - * untransform it to read the data. This could happen during - * the "zfs receive" of a stream which is deduplicated and - * either raw or compressed. We do not need to do this if the - * caller wants raw encrypted data. + * Ensure that this block's dnode has been decrypted if + * the caller has requested decrypted data. */ - if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 && + err = dbuf_read_verify_dnode_crypt(db, flags); + + /* + * If the arc buf is compressed or encrypted and the caller + * requested uncompressed data, we need to untransform it + * before returning. We also call arc_untransform() on any + * unauthenticated blocks, which will verify their MAC if + * the key is now available. + */ + if (err == 0 && db->db_buf != NULL && + (flags & DB_RF_NO_DECRYPT) == 0 && (arc_is_encrypted(db->db_buf) || + arc_is_unauthenticated(db->db_buf) || arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { zbookmark_phys_t zb; @@ -1376,7 +1441,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); - if (prefetch) + if (err == 0 && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); @@ -1943,6 +2008,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ddt_prefetch(os->os_spa, db->db_blkptr); if (db->db_level == 0) { + ASSERT(!db->db_objset->os_raw_receive || + dn->dn_maxblkid >= db->db_blkid); dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); ASSERT(dn->dn_maxblkid >= db->db_blkid); } @@ -3806,8 +3873,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_level == 0) { mutex_enter(&dn->dn_mtx); if (db->db_blkid > dn->dn_phys->dn_maxblkid && - db->db_blkid != DMU_SPILL_BLKID) + db->db_blkid != DMU_SPILL_BLKID) { + ASSERT0(db->db_objset->os_raw_receive); dn->dn_phys->dn_maxblkid = db->db_blkid; + } mutex_exit(&dn->dn_mtx); if (dn->dn_type == DMU_OT_DNODE) { diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 044031e4f..830da26f8 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -367,7 +367,12 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - if (trunc) { + /* + * Do not truncate the maxblkid if we are performing a raw + * receive. The raw receive sets the mablkid manually and + * must not be overriden. + */ + if (trunc && !dn->dn_objset->os_raw_receive) { ASSERTV(uint64_t off); dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;