mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-27 03:19:35 +03:00
assertion in arc_release() during encrypted receive
In the existing code, when doing a raw (encrypted) zfs receive, we call arc_convert_to_raw() from open context. This creates a race condition between arc_release()/arc_change_state() and writing out the block from syncing context (arc_write_ready/done()). This change makes it so that when we are doing a raw (encrypted) zfs receive, we save the crypt parameters (salt, iv, mac) of dnode blocks in the dbuf_dirty_record_t, and call arc_convert_to_raw() from syncing context when writing out the block of dnodes. Additionally, we can eliminate dr_raw and associated setters, and instead know that dnode blocks are always raw when doing a zfs receive (see the new field os_raw_receive). Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tom Caputi <tcaputi@datto.com> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #7424 Closes #7429
This commit is contained in:
parent
b40d45bc6c
commit
0c03d21ac9
@ -147,7 +147,16 @@ typedef struct dbuf_dirty_record {
|
|||||||
override_states_t dr_override_state;
|
override_states_t dr_override_state;
|
||||||
uint8_t dr_copies;
|
uint8_t dr_copies;
|
||||||
boolean_t dr_nopwrite;
|
boolean_t dr_nopwrite;
|
||||||
boolean_t dr_raw;
|
boolean_t dr_has_raw_params;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If dr_has_raw_params is set, the following crypt
|
||||||
|
* params will be set on the BP that's written.
|
||||||
|
*/
|
||||||
|
boolean_t dr_byteorder;
|
||||||
|
uint8_t dr_salt[ZIO_DATA_SALT_LEN];
|
||||||
|
uint8_t dr_iv[ZIO_DATA_IV_LEN];
|
||||||
|
uint8_t dr_mac[ZIO_DATA_MAC_LEN];
|
||||||
} dl;
|
} dl;
|
||||||
} dt;
|
} dt;
|
||||||
} dbuf_dirty_record_t;
|
} dbuf_dirty_record_t;
|
||||||
|
@ -472,7 +472,6 @@ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
|||||||
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
|
|
||||||
int dmu_object_dirty_raw(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
|
||||||
|
|
||||||
int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg);
|
int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg);
|
||||||
|
|
||||||
@ -715,7 +714,8 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
|
|||||||
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
||||||
*/
|
*/
|
||||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||||
void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);
|
void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
||||||
|
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* You must create a transaction, then hold the objects which you will
|
* You must create a transaction, then hold the objects which you will
|
||||||
@ -794,10 +794,7 @@ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
|||||||
uint64_t size, dmu_tx_t *tx);
|
uint64_t size, dmu_tx_t *tx);
|
||||||
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size);
|
uint64_t size);
|
||||||
int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset,
|
|
||||||
uint64_t size);
|
|
||||||
int dmu_free_long_object(objset_t *os, uint64_t object);
|
int dmu_free_long_object(objset_t *os, uint64_t object);
|
||||||
int dmu_free_long_object_raw(objset_t *os, uint64_t object);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convenience functions.
|
* Convenience functions.
|
||||||
@ -837,9 +834,6 @@ void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
|
|||||||
void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
|
||||||
struct arc_buf *buf, dmu_tx_t *tx);
|
struct arc_buf *buf, dmu_tx_t *tx);
|
||||||
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
|
||||||
int dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj,
|
|
||||||
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
|
|
||||||
const uint8_t *mac, dmu_tx_t *tx);
|
|
||||||
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
dmu_buf_t *handle, dmu_tx_t *tx);
|
dmu_buf_t *handle, dmu_tx_t *tx);
|
||||||
#ifdef HAVE_UIO_ZEROCOPY
|
#ifdef HAVE_UIO_ZEROCOPY
|
||||||
|
@ -139,6 +139,7 @@ struct objset {
|
|||||||
uint64_t os_flags;
|
uint64_t os_flags;
|
||||||
uint64_t os_freed_dnodes;
|
uint64_t os_freed_dnodes;
|
||||||
boolean_t os_rescan_dnodes;
|
boolean_t os_rescan_dnodes;
|
||||||
|
boolean_t os_raw_receive;
|
||||||
|
|
||||||
/* os_phys_buf should be written raw next txg */
|
/* os_phys_buf should be written raw next txg */
|
||||||
boolean_t os_next_write_raw[TXG_SIZE];
|
boolean_t os_next_write_raw[TXG_SIZE];
|
||||||
|
@ -1538,7 +1538,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
|
|||||||
|
|
||||||
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
|
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
|
||||||
dr->dt.dl.dr_nopwrite = B_FALSE;
|
dr->dt.dl.dr_nopwrite = B_FALSE;
|
||||||
dr->dt.dl.dr_raw = B_FALSE;
|
dr->dt.dl.dr_has_raw_params = B_FALSE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release the already-written buffer, so we leave it in
|
* Release the already-written buffer, so we leave it in
|
||||||
@ -2211,15 +2211,26 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is effectively the same as dmu_buf_will_dirty(), but
|
* This function is effectively the same as dmu_buf_will_dirty(), but
|
||||||
* indicates the caller expects raw encrypted data in the db. It will
|
* indicates the caller expects raw encrypted data in the db, and provides
|
||||||
* also set the raw flag on the created dirty record.
|
* the crypt params (byteorder, salt, iv, mac) which should be stored in the
|
||||||
|
* blkptr_t when this dbuf is written. This is only used for blocks of
|
||||||
|
* dnodes, during raw receive.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
||||||
|
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||||
dbuf_dirty_record_t *dr;
|
dbuf_dirty_record_t *dr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dr_has_raw_params is only processed for blocks of dnodes
|
||||||
|
* (see dbuf_sync_dnode_leaf_crypt()).
|
||||||
|
*/
|
||||||
|
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
|
||||||
|
ASSERT3U(db->db_level, ==, 0);
|
||||||
|
ASSERT(db->db_objset->os_raw_receive);
|
||||||
|
|
||||||
dmu_buf_will_dirty_impl(db_fake,
|
dmu_buf_will_dirty_impl(db_fake,
|
||||||
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
|
DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
|
||||||
|
|
||||||
@ -2229,8 +2240,12 @@ dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||||||
|
|
||||||
ASSERT3P(dr, !=, NULL);
|
ASSERT3P(dr, !=, NULL);
|
||||||
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
||||||
dr->dt.dl.dr_raw = B_TRUE;
|
|
||||||
db->db_objset->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
|
dr->dt.dl.dr_has_raw_params = B_TRUE;
|
||||||
|
dr->dt.dl.dr_byteorder = byteorder;
|
||||||
|
bcopy(salt, dr->dt.dl.dr_salt, ZIO_DATA_SALT_LEN);
|
||||||
|
bcopy(iv, dr->dt.dl.dr_iv, ZIO_DATA_IV_LEN);
|
||||||
|
bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma weak dmu_buf_fill_done = dbuf_fill_done
|
#pragma weak dmu_buf_fill_done = dbuf_fill_done
|
||||||
@ -2341,7 +2356,6 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
|||||||
ASSERT(db->db_buf != NULL);
|
ASSERT(db->db_buf != NULL);
|
||||||
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
||||||
ASSERT(dr->dt.dl.dr_data == db->db_buf);
|
ASSERT(dr->dt.dl.dr_data == db->db_buf);
|
||||||
IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw);
|
|
||||||
|
|
||||||
if (!arc_released(db->db_buf)) {
|
if (!arc_released(db->db_buf)) {
|
||||||
ASSERT(dr->dt.dl.dr_override_state ==
|
ASSERT(dr->dt.dl.dr_override_state ==
|
||||||
@ -3452,20 +3466,23 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure the dbuf's data is untransformed if the associated dirty
|
* When syncing out a blocks of dnodes, adjust the block to deal with
|
||||||
* record requires it. This is used by dbuf_sync_leaf() to ensure
|
* encryption. Normally, we make sure the block is decrypted before writing
|
||||||
* that a dnode block is decrypted before we write new data to it.
|
* it. If we have crypt params, then we are writing a raw (encrypted) block,
|
||||||
* For raw writes we assert that the buffer is already encrypted.
|
* from a raw receive. In this case, set the ARC buf's crypt params so
|
||||||
|
* that the BP will be filled with the correct byteorder, salt, iv, and mac.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
dbuf_check_crypt(dbuf_dirty_record_t *dr)
|
dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||||
|
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
|
||||||
|
ASSERT3U(db->db_level, ==, 0);
|
||||||
|
|
||||||
if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) {
|
if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) {
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3481,12 +3498,12 @@ dbuf_check_crypt(dbuf_dirty_record_t *dr)
|
|||||||
&zb, B_TRUE);
|
&zb, B_TRUE);
|
||||||
if (err)
|
if (err)
|
||||||
panic("Invalid dnode block MAC");
|
panic("Invalid dnode block MAC");
|
||||||
} else if (dr->dt.dl.dr_raw) {
|
} else if (dr->dt.dl.dr_has_raw_params) {
|
||||||
/*
|
(void) arc_release(dr->dt.dl.dr_data, db);
|
||||||
* Writing raw encrypted data requires the db's arc buffer
|
arc_convert_to_raw(dr->dt.dl.dr_data,
|
||||||
* to be converted to raw by the caller.
|
dmu_objset_id(db->db_objset),
|
||||||
*/
|
dr->dt.dl.dr_byteorder, DMU_OT_DNODE,
|
||||||
ASSERT(arc_is_encrypted(db->db_buf));
|
dr->dt.dl.dr_salt, dr->dt.dl.dr_iv, dr->dt.dl.dr_mac);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3667,7 +3684,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||||||
* or decrypted, depending on what we are writing to it this txg.
|
* or decrypted, depending on what we are writing to it this txg.
|
||||||
*/
|
*/
|
||||||
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
|
if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
|
||||||
dbuf_check_crypt(dr);
|
dbuf_prepare_encrypted_dnode_leaf(dr);
|
||||||
|
|
||||||
if (db->db_state != DB_NOFILL &&
|
if (db->db_state != DB_NOFILL &&
|
||||||
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
||||||
@ -4336,7 +4353,7 @@ EXPORT_SYMBOL(dbuf_free_range);
|
|||||||
EXPORT_SYMBOL(dbuf_new_size);
|
EXPORT_SYMBOL(dbuf_new_size);
|
||||||
EXPORT_SYMBOL(dbuf_release_bp);
|
EXPORT_SYMBOL(dbuf_release_bp);
|
||||||
EXPORT_SYMBOL(dbuf_dirty);
|
EXPORT_SYMBOL(dbuf_dirty);
|
||||||
EXPORT_SYMBOL(dmu_buf_will_change_crypt_params);
|
EXPORT_SYMBOL(dmu_buf_set_crypt_params);
|
||||||
EXPORT_SYMBOL(dmu_buf_will_dirty);
|
EXPORT_SYMBOL(dmu_buf_will_dirty);
|
||||||
EXPORT_SYMBOL(dmu_buf_will_not_fill);
|
EXPORT_SYMBOL(dmu_buf_will_not_fill);
|
||||||
EXPORT_SYMBOL(dmu_buf_will_fill);
|
EXPORT_SYMBOL(dmu_buf_will_fill);
|
||||||
|
119
module/zfs/dmu.c
119
module/zfs/dmu.c
@ -765,7 +765,7 @@ dmu_objset_zfs_unmounting(objset_t *os)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
||||||
uint64_t length, boolean_t raw)
|
uint64_t length)
|
||||||
{
|
{
|
||||||
uint64_t object_size;
|
uint64_t object_size;
|
||||||
int err;
|
int err;
|
||||||
@ -848,19 +848,6 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||||||
uint64_t, dmu_tx_get_txg(tx));
|
uint64_t, dmu_tx_get_txg(tx));
|
||||||
dnode_free_range(dn, chunk_begin, chunk_len, tx);
|
dnode_free_range(dn, chunk_begin, chunk_len, tx);
|
||||||
|
|
||||||
/* if this is a raw free, mark the dirty record as such */
|
|
||||||
if (raw) {
|
|
||||||
dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty;
|
|
||||||
|
|
||||||
while (dr != NULL && dr->dr_txg > tx->tx_txg)
|
|
||||||
dr = dr->dr_next;
|
|
||||||
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
|
||||||
dr->dt.dl.dr_raw = B_TRUE;
|
|
||||||
dn->dn_objset->os_next_write_raw
|
|
||||||
[tx->tx_txg & TXG_MASK] = B_TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
length -= chunk_len;
|
length -= chunk_len;
|
||||||
@ -878,7 +865,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
|
|||||||
err = dnode_hold(os, object, FTAG, &dn);
|
err = dnode_hold(os, object, FTAG, &dn);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE);
|
err = dmu_free_long_range_impl(os, dn, offset, length);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is important to zero out the maxblkid when freeing the entire
|
* It is important to zero out the maxblkid when freeing the entire
|
||||||
@ -893,37 +880,8 @@ dmu_free_long_range(objset_t *os, uint64_t object,
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This function is equivalent to dmu_free_long_range(), but also
|
|
||||||
* marks the new dirty record as a raw write.
|
|
||||||
*/
|
|
||||||
int
|
int
|
||||||
dmu_free_long_range_raw(objset_t *os, uint64_t object,
|
dmu_free_long_object(objset_t *os, uint64_t object)
|
||||||
uint64_t offset, uint64_t length)
|
|
||||||
{
|
|
||||||
dnode_t *dn;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
err = dnode_hold(os, object, FTAG, &dn);
|
|
||||||
if (err != 0)
|
|
||||||
return (err);
|
|
||||||
err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* It is important to zero out the maxblkid when freeing the entire
|
|
||||||
* file, so that (a) subsequent calls to dmu_free_long_range_impl()
|
|
||||||
* will take the fast path, and (b) dnode_reallocate() can verify
|
|
||||||
* that the entire file has been freed.
|
|
||||||
*/
|
|
||||||
if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
|
|
||||||
dn->dn_maxblkid = 0;
|
|
||||||
|
|
||||||
dnode_rele(dn, FTAG);
|
|
||||||
return (err);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
|
|
||||||
{
|
{
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
int err;
|
int err;
|
||||||
@ -938,8 +896,6 @@ dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
|
|||||||
dmu_tx_mark_netfree(tx);
|
dmu_tx_mark_netfree(tx);
|
||||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
if (raw)
|
|
||||||
err = dmu_object_dirty_raw(os, object, tx);
|
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
err = dmu_object_free(os, object, tx);
|
err = dmu_object_free(os, object, tx);
|
||||||
|
|
||||||
@ -951,19 +907,6 @@ dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
dmu_free_long_object(objset_t *os, uint64_t object)
|
|
||||||
{
|
|
||||||
return (dmu_free_long_object_impl(os, object, B_FALSE));
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
dmu_free_long_object_raw(objset_t *os, uint64_t object)
|
|
||||||
{
|
|
||||||
return (dmu_free_long_object_impl(os, object, B_TRUE));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size, dmu_tx_t *tx)
|
uint64_t size, dmu_tx_t *tx)
|
||||||
@ -1666,41 +1609,6 @@ dmu_return_arcbuf(arc_buf_t *buf)
|
|||||||
arc_buf_destroy(buf, FTAG);
|
arc_buf_destroy(buf, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj,
|
|
||||||
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
|
|
||||||
const uint8_t *mac, dmu_tx_t *tx)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
dmu_buf_t *handle = NULL;
|
|
||||||
dmu_buf_impl_t *db = NULL;
|
|
||||||
uint64_t offset = firstobj * DNODE_MIN_SIZE;
|
|
||||||
uint64_t dsobj = dmu_objset_id(os);
|
|
||||||
|
|
||||||
ret = dmu_buf_hold_by_dnode(DMU_META_DNODE(os), offset, FTAG, &handle,
|
|
||||||
DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
|
|
||||||
if (ret != 0)
|
|
||||||
return (ret);
|
|
||||||
|
|
||||||
dmu_buf_will_change_crypt_params(handle, tx);
|
|
||||||
|
|
||||||
db = (dmu_buf_impl_t *)handle;
|
|
||||||
ASSERT3P(db->db_buf, !=, NULL);
|
|
||||||
ASSERT3U(dsobj, !=, 0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This technically violates the assumption the dmu code makes
|
|
||||||
* that dnode blocks are only released in syncing context.
|
|
||||||
*/
|
|
||||||
(void) arc_release(db->db_buf, db);
|
|
||||||
arc_convert_to_raw(db->db_buf, dsobj, byteorder, DMU_OT_DNODE,
|
|
||||||
salt, iv, mac);
|
|
||||||
|
|
||||||
dmu_buf_rele(handle, FTAG);
|
|
||||||
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
dmu_buf_t *handle, dmu_tx_t *tx)
|
dmu_buf_t *handle, dmu_tx_t *tx)
|
||||||
@ -2222,25 +2130,6 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
|||||||
dnode_rele(dn, FTAG);
|
dnode_rele(dn, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Dirty an object and set the dirty record's raw flag. This is used
|
|
||||||
* when writing raw data to an object that will not effect the
|
|
||||||
* encryption parameters, specifically during raw receives.
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
dmu_object_dirty_raw(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
|
||||||
{
|
|
||||||
dnode_t *dn;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
err = dnode_hold(os, object, FTAG, &dn);
|
|
||||||
if (err)
|
|
||||||
return (err);
|
|
||||||
dmu_buf_will_change_crypt_params((dmu_buf_t *)dn->dn_dbuf, tx);
|
|
||||||
dnode_rele(dn, FTAG);
|
|
||||||
return (err);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When the "redundant_metadata" property is set to "most", only indirect
|
* When the "redundant_metadata" property is set to "most", only indirect
|
||||||
* blocks of this level and higher will have an additional ditto block.
|
* blocks of this level and higher will have an additional ditto block.
|
||||||
@ -2623,9 +2512,7 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
|
|||||||
EXPORT_SYMBOL(dmu_prefetch);
|
EXPORT_SYMBOL(dmu_prefetch);
|
||||||
EXPORT_SYMBOL(dmu_free_range);
|
EXPORT_SYMBOL(dmu_free_range);
|
||||||
EXPORT_SYMBOL(dmu_free_long_range);
|
EXPORT_SYMBOL(dmu_free_long_range);
|
||||||
EXPORT_SYMBOL(dmu_free_long_range_raw);
|
|
||||||
EXPORT_SYMBOL(dmu_free_long_object);
|
EXPORT_SYMBOL(dmu_free_long_object);
|
||||||
EXPORT_SYMBOL(dmu_free_long_object_raw);
|
|
||||||
EXPORT_SYMBOL(dmu_read);
|
EXPORT_SYMBOL(dmu_read);
|
||||||
EXPORT_SYMBOL(dmu_read_by_dnode);
|
EXPORT_SYMBOL(dmu_read_by_dnode);
|
||||||
EXPORT_SYMBOL(dmu_write);
|
EXPORT_SYMBOL(dmu_write);
|
||||||
|
@ -1663,11 +1663,12 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||||||
dmu_write_policy(os, NULL, 0, 0, &zp);
|
dmu_write_policy(os, NULL, 0, 0, &zp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are either claiming the ZIL or doing a raw receive write out
|
* If we are either claiming the ZIL or doing a raw receive, write
|
||||||
* the os_phys_buf raw. Neither of these actions will effect the MAC
|
* out the os_phys_buf raw. Neither of these actions will effect the
|
||||||
* at this point.
|
* MAC at this point.
|
||||||
*/
|
*/
|
||||||
if (os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
|
if (os->os_raw_receive ||
|
||||||
|
os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
|
||||||
ASSERT(os->os_encrypted);
|
ASSERT(os->os_encrypted);
|
||||||
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
|
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
|
||||||
arc_convert_to_raw(os->os_phys_buf,
|
arc_convert_to_raw(os->os_phys_buf,
|
||||||
|
@ -2174,6 +2174,7 @@ struct receive_writer_arg {
|
|||||||
uint64_t bytes_read; /* bytes read when current record created */
|
uint64_t bytes_read; /* bytes read when current record created */
|
||||||
|
|
||||||
/* Encryption parameters for the last received DRR_OBJECT_RANGE */
|
/* Encryption parameters for the last received DRR_OBJECT_RANGE */
|
||||||
|
boolean_t or_crypt_params_present;
|
||||||
uint64_t or_firstobj;
|
uint64_t or_firstobj;
|
||||||
uint64_t or_numslots;
|
uint64_t or_numslots;
|
||||||
uint8_t or_salt[ZIO_DATA_SALT_LEN];
|
uint8_t or_salt[ZIO_DATA_SALT_LEN];
|
||||||
@ -2507,23 +2508,16 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
|||||||
if (rwa->raw && nblkptr != drro->drr_nblkptr)
|
if (rwa->raw && nblkptr != drro->drr_nblkptr)
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
if (rwa->raw &&
|
if (drro->drr_blksz != doi.doi_data_block_size ||
|
||||||
(drro->drr_blksz != doi.doi_data_block_size ||
|
|
||||||
nblkptr < doi.doi_nblkptr ||
|
nblkptr < doi.doi_nblkptr ||
|
||||||
indblksz != doi.doi_metadata_block_size ||
|
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
|
||||||
drro->drr_nlevels < doi.doi_indirection ||
|
(rwa->raw &&
|
||||||
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) {
|
(indblksz != doi.doi_metadata_block_size ||
|
||||||
err = dmu_free_long_range_raw(rwa->os,
|
drro->drr_nlevels < doi.doi_indirection))) {
|
||||||
|
err = dmu_free_long_range(rwa->os,
|
||||||
drro->drr_object, 0, DMU_OBJECT_END);
|
drro->drr_object, 0, DMU_OBJECT_END);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
} else if (drro->drr_blksz != doi.doi_data_block_size ||
|
|
||||||
nblkptr < doi.doi_nblkptr ||
|
|
||||||
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
|
|
||||||
err = dmu_free_long_range(rwa->os, drro->drr_object,
|
|
||||||
0, DMU_OBJECT_END);
|
|
||||||
if (err != 0)
|
|
||||||
return (SET_ERROR(EINVAL));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2538,13 +2532,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
|||||||
*/
|
*/
|
||||||
if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
|
if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
|
||||||
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
|
drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
|
||||||
if (rwa->raw) {
|
err = dmu_free_long_object(rwa->os, drro->drr_object);
|
||||||
err = dmu_free_long_object_raw(rwa->os,
|
|
||||||
drro->drr_object);
|
|
||||||
} else {
|
|
||||||
err = dmu_free_long_object(rwa->os,
|
|
||||||
drro->drr_object);
|
|
||||||
}
|
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
@ -2586,9 +2574,6 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
|||||||
else if (err != 0)
|
else if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
if (rwa->raw)
|
|
||||||
err = dmu_free_long_object_raw(rwa->os, slot);
|
|
||||||
else
|
|
||||||
err = dmu_free_long_object(rwa->os, slot);
|
err = dmu_free_long_object(rwa->os, slot);
|
||||||
|
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
@ -2630,26 +2615,38 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
|
|||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw) {
|
if (rwa->or_crypt_params_present) {
|
||||||
/*
|
/*
|
||||||
* Convert the buffer associated with this range of dnodes
|
* Set the crypt params for the buffer associated with this
|
||||||
* to a raw buffer. This ensures that it will be written out
|
* range of dnodes. This causes the blkptr_t to have the
|
||||||
* as a raw buffer when we fill in the dnode object. Since we
|
* same crypt params (byteorder, salt, iv, mac) as on the
|
||||||
* are committing this tx now, it is possible for the dnode
|
* sending side.
|
||||||
* block to end up on-disk with the incorrect MAC. Despite
|
*
|
||||||
* this, the dataset is marked as inconsistent so no other
|
* Since we are committing this tx now, it is possible for
|
||||||
* code paths (apart from scrubs) will attempt to read this
|
* the dnode block to end up on-disk with the incorrect MAC,
|
||||||
* data. Scrubs will not be effected by this either since
|
* if subsequent objects in this block are received in a
|
||||||
* scrubs only read raw data and do not attempt to check
|
* different txg. However, since the dataset is marked as
|
||||||
* the MAC.
|
* inconsistent, no code paths will do a non-raw read (or
|
||||||
|
* decrypt the block / verify the MAC). The receive code and
|
||||||
|
* scrub code can safely do raw reads and verify the
|
||||||
|
* checksum. They don't need to verify the MAC.
|
||||||
*/
|
*/
|
||||||
err = dmu_convert_mdn_block_to_raw(rwa->os, rwa->or_firstobj,
|
dmu_buf_t *db = NULL;
|
||||||
rwa->or_byteorder, rwa->or_salt, rwa->or_iv, rwa->or_mac,
|
uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE;
|
||||||
tx);
|
|
||||||
|
err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os),
|
||||||
|
offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dmu_buf_set_crypt_params(db, rwa->or_byteorder,
|
||||||
|
rwa->or_salt, rwa->or_iv, rwa->or_mac, tx);
|
||||||
|
|
||||||
|
dmu_buf_rele(db, FTAG);
|
||||||
|
|
||||||
|
rwa->or_crypt_params_present = B_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
dmu_object_set_checksum(rwa->os, drro->drr_object,
|
dmu_object_set_checksum(rwa->os, drro->drr_object,
|
||||||
@ -2726,9 +2723,6 @@ receive_freeobjects(struct receive_writer_arg *rwa,
|
|||||||
else if (err != 0)
|
else if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
|
|
||||||
if (rwa->raw)
|
|
||||||
err = dmu_free_long_object_raw(rwa->os, obj);
|
|
||||||
else
|
|
||||||
err = dmu_free_long_object(rwa->os, obj);
|
err = dmu_free_long_object(rwa->os, obj);
|
||||||
|
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
@ -2781,9 +2775,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw)
|
|
||||||
VERIFY0(dmu_object_dirty_raw(rwa->os, drrw->drr_object, tx));
|
|
||||||
|
|
||||||
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
|
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
|
||||||
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
|
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
|
||||||
dmu_object_byteswap_t byteswap =
|
dmu_object_byteswap_t byteswap =
|
||||||
@ -2870,7 +2861,6 @@ receive_write_byref(struct receive_writer_arg *rwa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw) {
|
if (rwa->raw) {
|
||||||
VERIFY0(dmu_object_dirty_raw(rwa->os, drrwbr->drr_object, tx));
|
|
||||||
dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
|
dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
|
||||||
drrwbr->drr_offset, dbp, tx);
|
drrwbr->drr_offset, dbp, tx);
|
||||||
} else {
|
} else {
|
||||||
@ -2971,13 +2961,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
|||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rwa->raw) {
|
|
||||||
VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx));
|
|
||||||
dmu_buf_will_change_crypt_params(db_spill, tx);
|
|
||||||
} else {
|
|
||||||
dmu_buf_will_dirty(db_spill, tx);
|
dmu_buf_will_dirty(db_spill, tx);
|
||||||
}
|
|
||||||
|
|
||||||
if (db_spill->db_size < drrs->drr_length)
|
if (db_spill->db_size < drrs->drr_length)
|
||||||
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
||||||
@ -3016,13 +3000,8 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
|
|||||||
if (drrf->drr_object > rwa->max_object)
|
if (drrf->drr_object > rwa->max_object)
|
||||||
rwa->max_object = drrf->drr_object;
|
rwa->max_object = drrf->drr_object;
|
||||||
|
|
||||||
if (rwa->raw) {
|
|
||||||
err = dmu_free_long_range_raw(rwa->os, drrf->drr_object,
|
|
||||||
drrf->drr_offset, drrf->drr_length);
|
|
||||||
} else {
|
|
||||||
err = dmu_free_long_range(rwa->os, drrf->drr_object,
|
err = dmu_free_long_range(rwa->os, drrf->drr_object,
|
||||||
drrf->drr_offset, drrf->drr_length);
|
drrf->drr_offset, drrf->drr_length);
|
||||||
}
|
|
||||||
|
|
||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
@ -3062,9 +3041,10 @@ receive_object_range(struct receive_writer_arg *rwa,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The DRR_OBJECT_RANGE handling must be deferred to receive_object()
|
* The DRR_OBJECT_RANGE handling must be deferred to receive_object()
|
||||||
* so that the encryption parameters are set with each object that is
|
* so that the block of dnodes is not written out when it's empty,
|
||||||
* written into that block.
|
* and converted to a HOLE BP.
|
||||||
*/
|
*/
|
||||||
|
rwa->or_crypt_params_present = B_TRUE;
|
||||||
rwa->or_firstobj = drror->drr_firstobj;
|
rwa->or_firstobj = drror->drr_firstobj;
|
||||||
rwa->or_numslots = drror->drr_numslots;
|
rwa->or_numslots = drror->drr_numslots;
|
||||||
bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN);
|
bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN);
|
||||||
@ -3090,6 +3070,7 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
|
|||||||
* after we stopped receiving the dataset.
|
* after we stopped receiving the dataset.
|
||||||
*/
|
*/
|
||||||
txg_wait_synced(ds->ds_dir->dd_pool, 0);
|
txg_wait_synced(ds->ds_dir->dd_pool, 0);
|
||||||
|
ds->ds_objset->os_raw_receive = B_FALSE;
|
||||||
|
|
||||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||||
if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
|
if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
|
||||||
@ -3841,6 +3822,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
|||||||
rwa->byteswap = drc->drc_byteswap;
|
rwa->byteswap = drc->drc_byteswap;
|
||||||
rwa->resumable = drc->drc_resumable;
|
rwa->resumable = drc->drc_resumable;
|
||||||
rwa->raw = drc->drc_raw;
|
rwa->raw = drc->drc_raw;
|
||||||
|
rwa->os->os_raw_receive = drc->drc_raw;
|
||||||
|
|
||||||
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
|
(void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
|
||||||
TS_RUN, minclsyspri);
|
TS_RUN, minclsyspri);
|
||||||
@ -3903,12 +3885,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
|||||||
int next_err = 0;
|
int next_err = 0;
|
||||||
|
|
||||||
while (next_err == 0) {
|
while (next_err == 0) {
|
||||||
if (drc->drc_raw) {
|
|
||||||
free_err = dmu_free_long_object_raw(rwa->os,
|
|
||||||
obj);
|
|
||||||
} else {
|
|
||||||
free_err = dmu_free_long_object(rwa->os, obj);
|
free_err = dmu_free_long_object(rwa->os, obj);
|
||||||
}
|
|
||||||
if (free_err != 0 && free_err != ENOENT)
|
if (free_err != 0 && free_err != ENOENT)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -4037,6 +4014,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
|
|||||||
|
|
||||||
spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
|
spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
|
||||||
tx, "snap=%s", drc->drc_tosnap);
|
tx, "snap=%s", drc->drc_tosnap);
|
||||||
|
drc->drc_ds->ds_objset->os_raw_receive = B_FALSE;
|
||||||
|
|
||||||
if (!drc->drc_newfs) {
|
if (!drc->drc_newfs) {
|
||||||
dsl_dataset_t *origin_head;
|
dsl_dataset_t *origin_head;
|
||||||
|
Loading…
Reference in New Issue
Block a user