diff --git a/module/zfs/arc.c b/module/zfs/arc.c index bf514f00f..ab2564841 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2137,13 +2137,17 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, } /* - * Adjust encrypted and authenticated headers to accomodate the - * request if needed. + * Adjust encrypted and authenticated headers to accomodate + * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are + * allowed to fail decryption due to keys not being loaded + * without being marked as an IO error. */ if (HDR_PROTECTED(hdr)) { error = arc_fill_hdr_crypt(hdr, hash_lock, spa, zb, !!(flags & ARC_FILL_NOAUTH)); - if (error != 0) { + if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) { + return (error); + } else if (error != 0) { if (hash_lock != NULL) mutex_enter(hash_lock); arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index e3738db52..0a8e6d08b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1119,6 +1119,64 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, dbuf_rele_and_unlock(db, NULL, B_FALSE); } + +/* + * This function ensures that, when doing a decrypting read of a block, + * we make sure we have decrypted the dnode associated with it. We must do + * this so that we ensure we are fully authenticating the checksum-of-MACs + * tree from the root of the objset down to this block. Indirect blocks are + * always verified against their secure checksum-of-MACs assuming that the + * dnode containing them is correct. Now that we are doing a decrypting read, + * we can be sure that the key is loaded and verify that assumption. This is + * especially important considering that we always read encrypted dnode + * blocks as raw data (without verifying their MACs) to start, and + * decrypt / authenticate them when we need to read an encrypted bonus buffer. + */ +static int +dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) +{ + int err = 0; + objset_t *os = db->db_objset; + arc_buf_t *dnode_abuf; + dnode_t *dn; + zbookmark_phys_t zb; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (!os->os_encrypted || os->os_raw_receive || + (flags & DB_RF_NO_DECRYPT) != 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL; + + if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) { + DB_DNODE_EXIT(db); + return (0); + } + + SET_BOOKMARK(&zb, dmu_objset_id(os), + DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid); + err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE); + + /* + * An error code of EACCES tells us that the key is still not + * available. This is ok if we are only reading authenticated + * (and therefore non-encrypted) blocks. + */ + if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_type)) || + (db->db_blkid == DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)))) + err = 0; + + + DB_DNODE_EXIT(db); + + return (err); +} + static int dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) { @@ -1143,23 +1201,13 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) */ int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); - arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ? - dn->dn_dbuf->db_buf : NULL; /* if the underlying dnode block is encrypted, decrypt it */ - if (dn_buf != NULL && dn->dn_objset->os_encrypted && - DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) && - (flags & DB_RF_NO_DECRYPT) == 0 && - arc_is_encrypted(dn_buf)) { - SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), - DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid); - err = arc_untransform(dn_buf, dn->dn_objset->os_spa, - &zb, B_TRUE); - if (err != 0) { - DB_DNODE_EXIT(db); - mutex_exit(&db->db_mtx); - return (err); - } + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); } ASSERT3U(bonuslen, <=, db->db.db_size); @@ -1215,17 +1263,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) return (0); } - DB_DNODE_EXIT(db); - - db->db_state = DB_READ; - mutex_exit(&db->db_mtx); - - if (DBUF_IS_L2CACHEABLE(db)) - aflags |= ARC_FLAG_L2CACHE; - - SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), - db->db.db_object, db->db_level, db->db_blkid); - /* * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. @@ -1234,11 +1271,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) spa_log_error(db->db_objset->os_spa, &zb); zfs_panic_recover("unencrypted block in encrypted " "object set %llu", dmu_objset_id(db->db_objset)); + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); return (SET_ERROR(EIO)); } + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); + } + + DB_DNODE_EXIT(db); + + db->db_state = DB_READ; + mutex_exit(&db->db_mtx); + + if (DBUF_IS_L2CACHEABLE(db)) + aflags |= ARC_FLAG_L2CACHE; + dbuf_add_ref(db, NULL); + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + zio_flags = (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; @@ -1358,14 +1415,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) spa_t *spa = dn->dn_objset->os_spa; /* - * If the arc buf is compressed or encrypted, we need to - * untransform it to read the data. This could happen during - * the "zfs receive" of a stream which is deduplicated and - * either raw or compressed. We do not need to do this if the - * caller wants raw encrypted data. + * Ensure that this block's dnode has been decrypted if + * the caller has requested decrypted data. */ - if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 && + err = dbuf_read_verify_dnode_crypt(db, flags); + + /* + * If the arc buf is compressed or encrypted and the caller + * requested uncompressed data, we need to untransform it + * before returning. We also call arc_untransform() on any + * unauthenticated blocks, which will verify their MAC if + * the key is now available. + */ + if (err == 0 && db->db_buf != NULL && + (flags & DB_RF_NO_DECRYPT) == 0 && (arc_is_encrypted(db->db_buf) || + arc_is_unauthenticated(db->db_buf) || arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { zbookmark_phys_t zb; @@ -1376,7 +1441,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); - if (prefetch) + if (err == 0 && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); @@ -1943,6 +2008,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ddt_prefetch(os->os_spa, db->db_blkptr); if (db->db_level == 0) { + ASSERT(!db->db_objset->os_raw_receive || + dn->dn_maxblkid >= db->db_blkid); dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); ASSERT(dn->dn_maxblkid >= db->db_blkid); } @@ -3806,8 +3873,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_level == 0) { mutex_enter(&dn->dn_mtx); if (db->db_blkid > dn->dn_phys->dn_maxblkid && - db->db_blkid != DMU_SPILL_BLKID) + db->db_blkid != DMU_SPILL_BLKID) { + ASSERT0(db->db_objset->os_raw_receive); dn->dn_phys->dn_maxblkid = db->db_blkid; + } mutex_exit(&dn->dn_mtx); if (dn->dn_type == DMU_OT_DNODE) { diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 044031e4f..830da26f8 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -367,7 +367,12 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - if (trunc) { + /* + * Do not truncate the maxblkid if we are performing a raw + * receive. The raw receive sets the mablkid manually and + * must not be overriden. + */ + if (trunc && !dn->dn_objset->os_raw_receive) { ASSERTV(uint64_t off); dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;