mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Improve zfs receive performance with lightweight write
The performance of `zfs receive` can be bottlenecked on the CPU consumed by the `receive_writer` thread, especially when receiving streams with small compressed block sizes. Much of the CPU is spent creating and destroying dbuf's and arc buf's, one for each `WRITE` record in the send stream. This commit introduces the concept of "lightweight writes", which allows `zfs receive` to write to the DMU by providing an ABD, and instantiating only a new type of `dbuf_dirty_record_t`. The dbuf and arc buf for this "dirty leaf block" are not instantiated. Because there is no dbuf with the dirty data, this mechanism doesn't support reading from "lightweight-dirty" blocks (they would see the on-disk state rather than the dirty data). Since the dedup-receive code has been removed, `zfs receive` is write-only, so this works fine. Because there are no arc bufs for the received data, the received data is no longer cached in the ARC. Testing a receive of a stream with average compressed block size of 4KB, this commit improves performance by 50%, while also reducing CPU usage by 50% of a CPU. On a per-block basis, CPU consumed by receive_writer() and dbuf_evict() is now 1/7th (14%) of what it was. Baseline: 450MB/s, CPU in receive_writer() 40% + dbuf_evict() 35% New: 670MB/s, CPU in receive_writer() 17% + dbuf_evict() 0% The code is also restructured in a few ways: Added a `dr_dnode` field to the dbuf_dirty_record_t. This simplifies some existing code that no longer needs `DB_DNODE_ENTER()` and related routines. The new field is needed by the lightweight-type dirty record. To ensure that the `dr_dnode` field remains valid until the dirty record is freed, we have to ensure that the `dnode_move()` doesn't relocate the dnode_t. To do this we keep a hold on the dnode until it's zio's have completed. This is already done by the user-accounting code (`userquota_updates_task()`), this commit extends that so that it always keeps the dnode hold until zio completion (see `dnode_rele_task()`). `dn_dirty_txg` was previously zeroed when the dnode was synced. This was not necessary, since its meaning can be "when was this dnode last dirtied". This change simplifies the new `dnode_rele_task()` code. Removed some dead code related to `DRR_WRITE_BYREF` (dedup receive). Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Paul Dagnelie <pcd@delphix.com> Reviewed-by: George Wilson <gwilson@delphix.com> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #11105
This commit is contained in:
parent
7d4b365ce3
commit
ba67d82142
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
@ -130,6 +130,16 @@ typedef struct dbuf_dirty_record {
|
||||
/* list link for dbuf dirty records */
|
||||
list_node_t dr_dbuf_node;
|
||||
|
||||
/*
|
||||
* The dnode we are part of. Note that the dnode can not be moved or
|
||||
* evicted due to the hold that's added by dnode_setdirty() or
|
||||
* dmu_objset_sync_dnodes(), and released by dnode_rele_task() or
|
||||
* userquota_updates_task(). This hold is necessary for
|
||||
* dirty_lightweight_leaf-type dirty records, which don't have a hold
|
||||
* on a dbuf.
|
||||
*/
|
||||
dnode_t *dr_dnode;
|
||||
|
||||
/* pointer to parent dirty record */
|
||||
struct dbuf_dirty_record *dr_parent;
|
||||
|
||||
@ -171,6 +181,17 @@ typedef struct dbuf_dirty_record {
|
||||
uint8_t dr_iv[ZIO_DATA_IV_LEN];
|
||||
uint8_t dr_mac[ZIO_DATA_MAC_LEN];
|
||||
} dl;
|
||||
struct dirty_lightweight_leaf {
|
||||
/*
|
||||
* This dirty record refers to a leaf (level=0)
|
||||
* block, whose dbuf has not been instantiated for
|
||||
* performance reasons.
|
||||
*/
|
||||
uint64_t dr_blkid;
|
||||
abd_t *dr_abd;
|
||||
zio_prop_t dr_props;
|
||||
enum zio_flag dr_flags;
|
||||
} dll;
|
||||
} dt;
|
||||
} dbuf_dirty_record_t;
|
||||
|
||||
@ -349,11 +370,16 @@ void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
||||
dmu_tx_t *tx);
|
||||
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
|
||||
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
||||
bp_embedded_type_t etype, enum zio_compress comp,
|
||||
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
|
||||
|
||||
int dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
|
||||
const struct zio_prop *zp, enum zio_flag flags, dmu_tx_t *tx);
|
||||
|
||||
void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx);
|
||||
void dbuf_destroy(dmu_buf_impl_t *db);
|
||||
|
||||
|
@ -242,7 +242,7 @@ objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_t **osp);
|
||||
void dmu_objset_evict(objset_t *os);
|
||||
void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
|
||||
void dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx);
|
||||
void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
|
||||
boolean_t dmu_objset_userused_enabled(objset_t *os);
|
||||
void dmu_objset_userspace_upgrade(objset_t *os);
|
||||
|
@ -21,7 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2019, Klara Inc.
|
||||
@ -1973,6 +1973,74 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
|
||||
}
|
||||
}
|
||||
|
||||
dbuf_dirty_record_t *
|
||||
dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
|
||||
{
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
IMPLY(dn->dn_objset->os_raw_receive, dn->dn_maxblkid >= blkid);
|
||||
dnode_new_blkid(dn, blkid, tx, B_TRUE, B_FALSE);
|
||||
ASSERT(dn->dn_maxblkid >= blkid);
|
||||
|
||||
dbuf_dirty_record_t *dr = kmem_zalloc(sizeof (*dr), KM_SLEEP);
|
||||
list_link_init(&dr->dr_dirty_node);
|
||||
list_link_init(&dr->dr_dbuf_node);
|
||||
dr->dr_dnode = dn;
|
||||
dr->dr_txg = tx->tx_txg;
|
||||
dr->dt.dll.dr_blkid = blkid;
|
||||
dr->dr_accounted = dn->dn_datablksz;
|
||||
|
||||
/*
|
||||
* There should not be any dbuf for the block that we're dirtying.
|
||||
* Otherwise the buffer contents could be inconsistent between the
|
||||
* dbuf and the lightweight dirty record.
|
||||
*/
|
||||
ASSERT3P(NULL, ==, dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid));
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
int txgoff = tx->tx_txg & TXG_MASK;
|
||||
if (dn->dn_free_ranges[txgoff] != NULL) {
|
||||
range_tree_clear(dn->dn_free_ranges[txgoff], blkid, 1);
|
||||
}
|
||||
|
||||
if (dn->dn_nlevels == 1) {
|
||||
ASSERT3U(blkid, <, dn->dn_nblkptr);
|
||||
list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_setdirty(dn, tx);
|
||||
} else {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
dmu_buf_impl_t *parent_db = dbuf_hold_level(dn,
|
||||
1, blkid >> epbs, FTAG);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (parent_db == NULL) {
|
||||
kmem_free(dr, sizeof (*dr));
|
||||
return (NULL);
|
||||
}
|
||||
int err = dbuf_read(parent_db, NULL,
|
||||
(DB_RF_NOPREFETCH | DB_RF_CANFAIL));
|
||||
if (err != 0) {
|
||||
dbuf_rele(parent_db, FTAG);
|
||||
kmem_free(dr, sizeof (*dr));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
dbuf_dirty_record_t *parent_dr = dbuf_dirty(parent_db, tx);
|
||||
dbuf_rele(parent_db, FTAG);
|
||||
mutex_enter(&parent_dr->dt.di.dr_mtx);
|
||||
ASSERT3U(parent_dr->dr_txg, ==, tx->tx_txg);
|
||||
list_insert_tail(&parent_dr->dt.di.dr_children, dr);
|
||||
mutex_exit(&parent_dr->dt.di.dr_mtx);
|
||||
dr->dr_parent = parent_dr;
|
||||
}
|
||||
|
||||
dmu_objset_willuse_space(dn->dn_objset, dr->dr_accounted, tx);
|
||||
|
||||
return (dr);
|
||||
}
|
||||
|
||||
dbuf_dirty_record_t *
|
||||
dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
{
|
||||
@ -2090,6 +2158,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
|
||||
list_link_init(&dr->dr_dirty_node);
|
||||
list_link_init(&dr->dr_dbuf_node);
|
||||
dr->dr_dnode = dn;
|
||||
if (db->db_level == 0) {
|
||||
void *data_old = db->db_buf;
|
||||
|
||||
@ -2255,7 +2324,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
|
||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||
|
||||
if (dr->dt.dl.dr_data != db->db.db_data) {
|
||||
struct dnode *dn = DB_DNODE(db);
|
||||
struct dnode *dn = dr->dr_dnode;
|
||||
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
|
||||
|
||||
kmem_free(dr->dt.dl.dr_data, max_bonuslen);
|
||||
@ -2280,9 +2349,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
|
||||
static boolean_t
|
||||
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
dbuf_dirty_record_t *dr;
|
||||
|
||||
ASSERT(txg != 0);
|
||||
|
||||
@ -2302,13 +2369,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
/*
|
||||
* If this buffer is not dirty, we're done.
|
||||
*/
|
||||
dr = dbuf_find_dirty_eq(db, txg);
|
||||
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, txg);
|
||||
if (dr == NULL)
|
||||
return (B_FALSE);
|
||||
ASSERT(dr->dr_dbuf == db);
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
||||
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
|
||||
|
||||
@ -2336,7 +2402,6 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
if (db->db_state != DB_NOFILL) {
|
||||
dbuf_unoverride(dr);
|
||||
@ -3835,15 +3900,13 @@ dbuf_sync_bonus(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
|
||||
ASSERT0(db->db_level);
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
ASSERT(DB_DNODE_HELD(db));
|
||||
ASSERT(db->db_blkid == DMU_BONUS_BLKID);
|
||||
ASSERT(data != NULL);
|
||||
|
||||
dnode_t *dn = DB_DNODE(db);
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
|
||||
DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
|
||||
bcopy(data, DN_BONUS(dn->dn_phys), DN_MAX_BONUS_LEN(dn->dn_phys));
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
dbuf_sync_leaf_verify_bonus_dnode(dr);
|
||||
|
||||
@ -3902,8 +3965,7 @@ noinline static void
|
||||
dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||
dnode_t *dn;
|
||||
zio_t *zio;
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
@ -3923,12 +3985,9 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
ASSERT3U(db->db_state, ==, DB_CACHED);
|
||||
ASSERT(db->db_buf != NULL);
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
/* Indirect block size must match what the dnode thinks it is. */
|
||||
ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
|
||||
dbuf_check_blkptr(dn, db);
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
/* Provide the pending dirty record to child dbufs */
|
||||
db->db_data_pending = dr;
|
||||
@ -3937,7 +3996,7 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
|
||||
dbuf_write(dr, db->db_buf, tx);
|
||||
|
||||
zio = dr->dr_zio;
|
||||
zio_t *zio = dr->dr_zio;
|
||||
mutex_enter(&dr->dt.di.dr_mtx);
|
||||
dbuf_sync_list(&dr->dt.di.dr_children, db->db_level - 1, tx);
|
||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||
@ -3962,7 +4021,7 @@ static void
|
||||
dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
dnode_t *dn = DB_DNODE(dr->dr_dbuf);
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
||||
/*
|
||||
* Encrypted bonus buffers can have data past their bonuslen.
|
||||
@ -3985,6 +4044,153 @@ dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr)
|
||||
#endif
|
||||
}
|
||||
|
||||
static blkptr_t *
|
||||
dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
|
||||
{
|
||||
/* This must be a lightweight dirty record. */
|
||||
ASSERT3P(dr->dr_dbuf, ==, NULL);
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
||||
if (dn->dn_phys->dn_nlevels == 1) {
|
||||
VERIFY3U(dr->dt.dll.dr_blkid, <, dn->dn_phys->dn_nblkptr);
|
||||
return (&dn->dn_phys->dn_blkptr[dr->dt.dll.dr_blkid]);
|
||||
} else {
|
||||
dmu_buf_impl_t *parent_db = dr->dr_parent->dr_dbuf;
|
||||
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
VERIFY3U(parent_db->db_level, ==, 1);
|
||||
VERIFY3P(parent_db->db_dnode_handle->dnh_dnode, ==, dn);
|
||||
VERIFY3U(dr->dt.dll.dr_blkid >> epbs, ==, parent_db->db_blkid);
|
||||
blkptr_t *bp = parent_db->db.db_data;
|
||||
return (&bp[dr->dt.dll.dr_blkid & ((1 << epbs) - 1)]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_lightweight_ready(zio_t *zio)
|
||||
{
|
||||
dbuf_dirty_record_t *dr = zio->io_private;
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
|
||||
if (zio->io_error != 0)
|
||||
return;
|
||||
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
||||
blkptr_t *bp_orig = dbuf_lightweight_bp(dr);
|
||||
spa_t *spa = dmu_objset_spa(dn->dn_objset);
|
||||
int64_t delta = bp_get_dsize_sync(spa, bp) -
|
||||
bp_get_dsize_sync(spa, bp_orig);
|
||||
dnode_diduse_space(dn, delta);
|
||||
|
||||
uint64_t blkid = dr->dt.dll.dr_blkid;
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
if (blkid > dn->dn_phys->dn_maxblkid) {
|
||||
ASSERT0(dn->dn_objset->os_raw_receive);
|
||||
dn->dn_phys->dn_maxblkid = blkid;
|
||||
}
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
if (!BP_IS_EMBEDDED(bp)) {
|
||||
uint64_t fill = BP_IS_HOLE(bp) ? 0 : 1;
|
||||
BP_SET_FILL(bp, fill);
|
||||
}
|
||||
|
||||
dmu_buf_impl_t *parent_db;
|
||||
EQUIV(dr->dr_parent == NULL, dn->dn_phys->dn_nlevels == 1);
|
||||
if (dr->dr_parent == NULL) {
|
||||
parent_db = dn->dn_dbuf;
|
||||
} else {
|
||||
parent_db = dr->dr_parent->dr_dbuf;
|
||||
}
|
||||
rw_enter(&parent_db->db_rwlock, RW_WRITER);
|
||||
*bp_orig = *bp;
|
||||
rw_exit(&parent_db->db_rwlock);
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_lightweight_physdone(zio_t *zio)
|
||||
{
|
||||
dbuf_dirty_record_t *dr = zio->io_private;
|
||||
dsl_pool_t *dp = spa_get_dsl(zio->io_spa);
|
||||
ASSERT3U(dr->dr_txg, ==, zio->io_txg);
|
||||
|
||||
/*
|
||||
* The callback will be called io_phys_children times. Retire one
|
||||
* portion of our dirty space each time we are called. Any rounding
|
||||
* error will be cleaned up by dbuf_lightweight_done().
|
||||
*/
|
||||
int delta = dr->dr_accounted / zio->io_phys_children;
|
||||
dsl_pool_undirty_space(dp, delta, zio->io_txg);
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_lightweight_done(zio_t *zio)
|
||||
{
|
||||
dbuf_dirty_record_t *dr = zio->io_private;
|
||||
|
||||
VERIFY0(zio->io_error);
|
||||
|
||||
objset_t *os = dr->dr_dnode->dn_objset;
|
||||
dmu_tx_t *tx = os->os_synctx;
|
||||
|
||||
if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
|
||||
ASSERT(BP_EQUAL(zio->io_bp, &zio->io_bp_orig));
|
||||
} else {
|
||||
dsl_dataset_t *ds = os->os_dsl_dataset;
|
||||
(void) dsl_dataset_block_kill(ds, &zio->io_bp_orig, tx, B_TRUE);
|
||||
dsl_dataset_block_born(ds, zio->io_bp, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* See comment in dbuf_write_done().
|
||||
*/
|
||||
if (zio->io_phys_children == 0) {
|
||||
dsl_pool_undirty_space(dmu_objset_pool(os),
|
||||
dr->dr_accounted, zio->io_txg);
|
||||
} else {
|
||||
dsl_pool_undirty_space(dmu_objset_pool(os),
|
||||
dr->dr_accounted % zio->io_phys_children, zio->io_txg);
|
||||
}
|
||||
|
||||
abd_free(dr->dt.dll.dr_abd);
|
||||
kmem_free(dr, sizeof (*dr));
|
||||
}
|
||||
|
||||
noinline static void
|
||||
dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
zio_t *pio;
|
||||
if (dn->dn_phys->dn_nlevels == 1) {
|
||||
pio = dn->dn_zio;
|
||||
} else {
|
||||
pio = dr->dr_parent->dr_zio;
|
||||
}
|
||||
|
||||
zbookmark_phys_t zb = {
|
||||
.zb_objset = dmu_objset_id(dn->dn_objset),
|
||||
.zb_object = dn->dn_object,
|
||||
.zb_level = 0,
|
||||
.zb_blkid = dr->dt.dll.dr_blkid,
|
||||
};
|
||||
|
||||
/*
|
||||
* See comment in dbuf_write(). This is so that zio->io_bp_orig
|
||||
* will have the old BP in dbuf_lightweight_done().
|
||||
*/
|
||||
dr->dr_bp_copy = *dbuf_lightweight_bp(dr);
|
||||
|
||||
dr->dr_zio = zio_write(pio, dmu_objset_spa(dn->dn_objset),
|
||||
dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd,
|
||||
dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd),
|
||||
&dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL,
|
||||
dbuf_lightweight_physdone, dbuf_lightweight_done, dr,
|
||||
ZIO_PRIORITY_ASYNC_WRITE,
|
||||
ZIO_FLAG_MUSTSUCCEED | dr->dt.dll.dr_flags, &zb);
|
||||
|
||||
zio_nowait(dr->dr_zio);
|
||||
}
|
||||
|
||||
/*
|
||||
* dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is
|
||||
* critical the we not allow the compiler to inline this function in to
|
||||
@ -3995,7 +4201,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
{
|
||||
arc_buf_t **datap = &dr->dt.dl.dr_data;
|
||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||
dnode_t *dn;
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
objset_t *os;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
|
||||
@ -4019,9 +4225,6 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
}
|
||||
DBUF_VERIFY(db);
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
|
||||
if (db->db_blkid == DMU_SPILL_BLKID) {
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
|
||||
@ -4111,16 +4314,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
ASSERT(!list_link_active(&dr->dr_dirty_node));
|
||||
if (dn->dn_object == DMU_META_DNODE_OBJECT) {
|
||||
list_insert_tail(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
||||
DB_DNODE_EXIT(db);
|
||||
} else {
|
||||
/*
|
||||
* Although zio_nowait() does not "wait for an IO", it does
|
||||
* initiate the IO. If this is an empty write it seems plausible
|
||||
* that the IO could actually be completed before the nowait
|
||||
* returns. We need to DB_DNODE_EXIT() first in case
|
||||
* zio_nowait() invalidates the dbuf.
|
||||
*/
|
||||
DB_DNODE_EXIT(db);
|
||||
zio_nowait(dr->dr_zio);
|
||||
}
|
||||
}
|
||||
@ -4143,15 +4337,19 @@ dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx)
|
||||
DMU_META_DNODE_OBJECT);
|
||||
break;
|
||||
}
|
||||
if (dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
|
||||
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
|
||||
VERIFY3U(dr->dr_dbuf->db_level, ==, level);
|
||||
}
|
||||
list_remove(list, dr);
|
||||
if (dr->dr_dbuf->db_level > 0)
|
||||
dbuf_sync_indirect(dr, tx);
|
||||
else
|
||||
dbuf_sync_leaf(dr, tx);
|
||||
if (dr->dr_dbuf == NULL) {
|
||||
dbuf_sync_lightweight(dr, tx);
|
||||
} else {
|
||||
if (dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
|
||||
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
|
||||
VERIFY3U(dr->dr_dbuf->db_level, ==, level);
|
||||
}
|
||||
if (dr->dr_dbuf->db_level > 0)
|
||||
dbuf_sync_indirect(dr, tx);
|
||||
else
|
||||
dbuf_sync_leaf(dr, tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -4331,7 +4529,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
blkptr_t *bp = db->db_blkptr;
|
||||
objset_t *os = db->db_objset;
|
||||
dmu_tx_t *tx = os->os_synctx;
|
||||
dbuf_dirty_record_t *dr;
|
||||
|
||||
ASSERT0(zio->io_error);
|
||||
ASSERT(db->db_blkptr == bp);
|
||||
@ -4352,7 +4549,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
|
||||
DBUF_VERIFY(db);
|
||||
|
||||
dr = db->db_data_pending;
|
||||
dbuf_dirty_record_t *dr = db->db_data_pending;
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
ASSERT(!list_link_active(&dr->dr_dirty_node));
|
||||
ASSERT(dr->dr_dbuf == db);
|
||||
ASSERT(list_next(&db->db_dirty_records, dr) == NULL);
|
||||
@ -4360,14 +4558,9 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
if (db->db_blkid == DMU_SPILL_BLKID) {
|
||||
dnode_t *dn;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
|
||||
ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
|
||||
db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
|
||||
DB_DNODE_EXIT(db);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -4379,10 +4572,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
arc_buf_destroy(dr->dt.dl.dr_data, db);
|
||||
}
|
||||
} else {
|
||||
dnode_t *dn;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||
ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
|
||||
if (!BP_IS_HOLE(db->db_blkptr)) {
|
||||
@ -4393,7 +4582,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
|
||||
db->db.db_size);
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
mutex_destroy(&dr->dt.di.dr_mtx);
|
||||
list_destroy(&dr->dt.di.dr_children);
|
||||
}
|
||||
@ -4586,7 +4774,7 @@ static void
|
||||
dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||
dnode_t *dn;
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
objset_t *os;
|
||||
dmu_buf_impl_t *parent = db->db_parent;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
@ -4597,8 +4785,6 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
os = dn->dn_objset;
|
||||
|
||||
if (db->db_state != DB_NOFILL) {
|
||||
@ -4654,7 +4840,6 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
|
||||
|
||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
/*
|
||||
* We copy the blkptr now (rather than when we instantiate the dirty
|
||||
|
@ -1395,6 +1395,32 @@ dmu_return_arcbuf(arc_buf_t *buf)
|
||||
arc_buf_destroy(buf, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* A "lightweight" write is faster than a regular write (e.g.
|
||||
* dmu_write_by_dnode() or dmu_assign_arcbuf_by_dnode()), because it avoids the
|
||||
* CPU cost of creating a dmu_buf_impl_t and arc_buf_[hdr_]_t. However, the
|
||||
* data can not be read or overwritten until the transaction's txg has been
|
||||
* synced. This makes it appropriate for workloads that are known to be
|
||||
* (temporarily) write-only, like "zfs receive".
|
||||
*
|
||||
* A single block is written, starting at the specified offset in bytes. If
|
||||
* the call is successful, it returns 0 and the provided abd has been
|
||||
* consumed (the caller should not free it).
|
||||
*/
|
||||
int
|
||||
dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
|
||||
const zio_prop_t *zp, enum zio_flag flags, dmu_tx_t *tx)
|
||||
{
|
||||
dbuf_dirty_record_t *dr =
|
||||
dbuf_dirty_lightweight(dn, dbuf_whichblock(dn, 0, offset), tx);
|
||||
if (dr == NULL)
|
||||
return (SET_ERROR(EIO));
|
||||
dr->dt.dll.dr_abd = abd;
|
||||
dr->dt.dll.dr_props = *zp;
|
||||
dr->dt.dll.dr_flags = flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* When possible directly assign passed loaned arc buffer to a dbuf.
|
||||
* If this is not possible copy the contents of passed arc buf via
|
||||
@ -1418,8 +1444,8 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
/*
|
||||
* We can only assign if the offset is aligned, the arc buf is the
|
||||
* same size as the dbuf, and the dbuf is not metadata.
|
||||
* We can only assign if the offset is aligned and the arc buf is the
|
||||
* same size as the dbuf.
|
||||
*/
|
||||
if (offset == db->db.db_offset && blksz == db->db.db_size) {
|
||||
dbuf_assign_arcbuf(db, buf, tx);
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
@ -1235,7 +1235,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||
}
|
||||
VERIFY0(zio_wait(rzio));
|
||||
|
||||
dmu_objset_do_userquota_updates(os, tx);
|
||||
dmu_objset_sync_done(os, tx);
|
||||
taskq_wait(dp->dp_sync_taskq);
|
||||
if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
|
||||
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||
@ -1502,23 +1502,13 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
|
||||
multilist_sublist_remove(list, dn);
|
||||
|
||||
/*
|
||||
* If we are not doing useraccounting (os_synced_dnodes == NULL)
|
||||
* we are done with this dnode for this txg. Unset dn_dirty_txg
|
||||
* if later txgs aren't dirtying it so that future holders do
|
||||
* not get a stale value. Otherwise, we will do this in
|
||||
* userquota_updates_task() when processing has completely
|
||||
* finished for this txg.
|
||||
* See the comment above dnode_rele_task() for an explanation
|
||||
* of why this dnode hold is always needed (even when not
|
||||
* doing user accounting).
|
||||
*/
|
||||
multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
|
||||
if (newlist != NULL) {
|
||||
(void) dnode_add_ref(dn, newlist);
|
||||
multilist_insert(newlist, dn);
|
||||
} else {
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
if (dn->dn_dirty_txg == tx->tx_txg)
|
||||
dn->dn_dirty_txg = 0;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
}
|
||||
(void) dnode_add_ref(dn, newlist);
|
||||
multilist_insert(newlist, dn);
|
||||
|
||||
dnode_sync(dn, tx);
|
||||
}
|
||||
@ -1680,22 +1670,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||
|
||||
txgoff = tx->tx_txg & TXG_MASK;
|
||||
|
||||
if (dmu_objset_userused_enabled(os) &&
|
||||
(!os->os_encrypted || !dmu_objset_is_receiving(os))) {
|
||||
/*
|
||||
* We must create the list here because it uses the
|
||||
* dn_dirty_link[] of this txg. But it may already
|
||||
* exist because we call dsl_dataset_sync() twice per txg.
|
||||
*/
|
||||
if (os->os_synced_dnodes == NULL) {
|
||||
os->os_synced_dnodes =
|
||||
multilist_create(sizeof (dnode_t),
|
||||
offsetof(dnode_t, dn_dirty_link[txgoff]),
|
||||
dnode_multilist_index_func);
|
||||
} else {
|
||||
ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
|
||||
offsetof(dnode_t, dn_dirty_link[txgoff]));
|
||||
}
|
||||
/*
|
||||
* We must create the list here because it uses the
|
||||
* dn_dirty_link[] of this txg. But it may already
|
||||
* exist because we call dsl_dataset_sync() twice per txg.
|
||||
*/
|
||||
if (os->os_synced_dnodes == NULL) {
|
||||
os->os_synced_dnodes =
|
||||
multilist_create(sizeof (dnode_t),
|
||||
offsetof(dnode_t, dn_dirty_link[txgoff]),
|
||||
dnode_multilist_index_func);
|
||||
} else {
|
||||
ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
|
||||
offsetof(dnode_t, dn_dirty_link[txgoff]));
|
||||
}
|
||||
|
||||
ml = os->os_dirty_dnodes[txgoff];
|
||||
@ -2002,8 +1989,6 @@ userquota_updates_task(void *arg)
|
||||
dn->dn_id_flags |= DN_ID_CHKED_BONUS;
|
||||
}
|
||||
dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
|
||||
if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
|
||||
dn->dn_dirty_txg = 0;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
multilist_sublist_remove(list, dn);
|
||||
@ -2014,13 +1999,44 @@ userquota_updates_task(void *arg)
|
||||
kmem_free(uua, sizeof (*uua));
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
|
||||
/*
|
||||
* Release dnode holds from dmu_objset_sync_dnodes(). When the dnode is being
|
||||
* synced (i.e. we have issued the zio's for blocks in the dnode), it can't be
|
||||
* evicted because the block containing the dnode can't be evicted until it is
|
||||
* written out. However, this hold is necessary to prevent the dnode_t from
|
||||
* being moved (via dnode_move()) while it's still referenced by
|
||||
* dbuf_dirty_record_t:dr_dnode. And dr_dnode is needed for
|
||||
* dirty_lightweight_leaf-type dirty records.
|
||||
*
|
||||
* If we are doing user-object accounting, the dnode_rele() happens from
|
||||
* userquota_updates_task() instead.
|
||||
*/
|
||||
static void
|
||||
dnode_rele_task(void *arg)
|
||||
{
|
||||
int num_sublists;
|
||||
userquota_updates_arg_t *uua = arg;
|
||||
objset_t *os = uua->uua_os;
|
||||
|
||||
multilist_sublist_t *list =
|
||||
multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
|
||||
|
||||
dnode_t *dn;
|
||||
while ((dn = multilist_sublist_head(list)) != NULL) {
|
||||
multilist_sublist_remove(list, dn);
|
||||
dnode_rele(dn, os->os_synced_dnodes);
|
||||
}
|
||||
multilist_sublist_unlock(list);
|
||||
kmem_free(uua, sizeof (*uua));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return TRUE if userquota updates are needed.
|
||||
*/
|
||||
static boolean_t
|
||||
dmu_objset_do_userquota_updates_prep(objset_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
if (!dmu_objset_userused_enabled(os))
|
||||
return;
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* If this is a raw receive just return and handle accounting
|
||||
@ -2030,10 +2046,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
|
||||
* used for recovery.
|
||||
*/
|
||||
if (os->os_encrypted && dmu_objset_is_receiving(os))
|
||||
return;
|
||||
return (B_FALSE);
|
||||
|
||||
if (tx->tx_txg <= os->os_spa->spa_claim_max_txg)
|
||||
return;
|
||||
return (B_FALSE);
|
||||
|
||||
/* Allocate the user/group/project used objects if necessary. */
|
||||
if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
|
||||
@ -2050,23 +2066,39 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
|
||||
VERIFY0(zap_create_claim(os, DMU_PROJECTUSED_OBJECT,
|
||||
DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
|
||||
}
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
num_sublists = multilist_get_num_sublists(os->os_synced_dnodes);
|
||||
/*
|
||||
* Dispatch taskq tasks to dp_sync_taskq to update the user accounting, and
|
||||
* also release the holds on the dnodes from dmu_objset_sync_dnodes().
|
||||
* The caller must taskq_wait(dp_sync_taskq).
|
||||
*/
|
||||
void
|
||||
dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx);
|
||||
|
||||
int num_sublists = multilist_get_num_sublists(os->os_synced_dnodes);
|
||||
for (int i = 0; i < num_sublists; i++) {
|
||||
if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i))
|
||||
continue;
|
||||
userquota_updates_arg_t *uua =
|
||||
kmem_alloc(sizeof (*uua), KM_SLEEP);
|
||||
uua->uua_os = os;
|
||||
uua->uua_sublist_idx = i;
|
||||
uua->uua_tx = tx;
|
||||
/* note: caller does taskq_wait() */
|
||||
|
||||
/*
|
||||
* If we don't need to update userquotas, use
|
||||
* dnode_rele_task() to call dnode_rele()
|
||||
*/
|
||||
(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
|
||||
userquota_updates_task, uua, 0);
|
||||
need_userquota ? userquota_updates_task : dnode_rele_task,
|
||||
uua, 0);
|
||||
/* callback frees uua */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Returns a pointer to data to find uid/gid from
|
||||
*
|
||||
@ -2088,18 +2120,11 @@ dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
if (dr == NULL) {
|
||||
data = NULL;
|
||||
} else {
|
||||
dnode_t *dn;
|
||||
|
||||
DB_DNODE_ENTER(dr->dr_dbuf);
|
||||
dn = DB_DNODE(dr->dr_dbuf);
|
||||
|
||||
if (dn->dn_bonuslen == 0 &&
|
||||
if (dr->dr_dnode->dn_bonuslen == 0 &&
|
||||
dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
|
||||
data = dr->dt.dl.dr_data->b_data;
|
||||
else
|
||||
data = dr->dt.dl.dr_data;
|
||||
|
||||
DB_DNODE_EXIT(dr->dr_dbuf);
|
||||
}
|
||||
|
||||
return (data);
|
||||
@ -2990,7 +3015,7 @@ EXPORT_SYMBOL(dmu_objset_create_impl);
|
||||
EXPORT_SYMBOL(dmu_objset_open_impl);
|
||||
EXPORT_SYMBOL(dmu_objset_evict);
|
||||
EXPORT_SYMBOL(dmu_objset_register_type);
|
||||
EXPORT_SYMBOL(dmu_objset_do_userquota_updates);
|
||||
EXPORT_SYMBOL(dmu_objset_sync_done);
|
||||
EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
|
||||
EXPORT_SYMBOL(dmu_objset_userused_enabled);
|
||||
EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
|
||||
|
@ -79,10 +79,10 @@ struct receive_record_arg {
|
||||
dmu_replay_record_t header;
|
||||
void *payload; /* Pointer to a buffer containing the payload */
|
||||
/*
|
||||
* If the record is a write, pointer to the arc_buf_t containing the
|
||||
* If the record is a WRITE or SPILL, pointer to the abd containing the
|
||||
* payload.
|
||||
*/
|
||||
arc_buf_t *arc_buf;
|
||||
abd_t *abd;
|
||||
int payload_size;
|
||||
uint64_t bytes_read; /* bytes read from stream when record created */
|
||||
boolean_t eos_marker; /* Marks the end of the stream */
|
||||
@ -95,8 +95,8 @@ struct receive_writer_arg {
|
||||
bqueue_t q;
|
||||
|
||||
/*
|
||||
* These three args are used to signal to the main thread that we're
|
||||
* done.
|
||||
* These three members are used to signal to the main thread when
|
||||
* we're done.
|
||||
*/
|
||||
kmutex_t mutex;
|
||||
kcondvar_t cv;
|
||||
@ -175,18 +175,6 @@ byteswap_record(dmu_replay_record_t *drr)
|
||||
DO64(drr_write.drr_key.ddk_prop);
|
||||
DO64(drr_write.drr_compressed_size);
|
||||
break;
|
||||
case DRR_WRITE_BYREF:
|
||||
DO64(drr_write_byref.drr_object);
|
||||
DO64(drr_write_byref.drr_offset);
|
||||
DO64(drr_write_byref.drr_length);
|
||||
DO64(drr_write_byref.drr_toguid);
|
||||
DO64(drr_write_byref.drr_refguid);
|
||||
DO64(drr_write_byref.drr_refobject);
|
||||
DO64(drr_write_byref.drr_refoffset);
|
||||
ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
|
||||
drr_key.ddk_cksum);
|
||||
DO64(drr_write_byref.drr_key.ddk_prop);
|
||||
break;
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
DO64(drr_write_embedded.drr_object);
|
||||
DO64(drr_write_embedded.drr_offset);
|
||||
@ -1903,58 +1891,106 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
|
||||
struct receive_record_arg *rrd;
|
||||
while ((rrd = list_head(&rwa->write_batch)) != NULL) {
|
||||
struct drr_write *drrw = &rrd->header.drr_u.drr_write;
|
||||
arc_buf_t *abuf = rrd->arc_buf;
|
||||
abd_t *abd = rrd->abd;
|
||||
|
||||
ASSERT3U(drrw->drr_object, ==, rwa->last_object);
|
||||
|
||||
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
|
||||
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drrw->drr_type);
|
||||
dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
|
||||
DRR_WRITE_PAYLOAD_SIZE(drrw));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are receiving an incremental large-block stream into
|
||||
* a dataset that previously did a non-large-block receive,
|
||||
* the WRITE record may be larger than the object's block
|
||||
* size. dmu_assign_arcbuf_by_dnode() handles this as long
|
||||
* as the arcbuf is not compressed, so decompress it here if
|
||||
* necessary.
|
||||
*/
|
||||
if (drrw->drr_logical_size != dn->dn_datablksz &&
|
||||
arc_get_compression(abuf) != ZIO_COMPRESS_OFF) {
|
||||
if (drrw->drr_logical_size != dn->dn_datablksz) {
|
||||
/*
|
||||
* The WRITE record is larger than the object's block
|
||||
* size. We must be receiving an incremental
|
||||
* large-block stream into a dataset that previously did
|
||||
* a non-large-block receive. Lightweight writes must
|
||||
* be exactly one block, so we need to decompress the
|
||||
* data (if compressed) and do a normal dmu_write().
|
||||
*/
|
||||
ASSERT3U(drrw->drr_logical_size, >, dn->dn_datablksz);
|
||||
zbookmark_phys_t zb = {
|
||||
.zb_objset = dmu_objset_id(rwa->os),
|
||||
.zb_object = rwa->last_object,
|
||||
.zb_level = 0,
|
||||
.zb_blkid =
|
||||
drrw->drr_offset >> dn->dn_datablkshift,
|
||||
};
|
||||
if (DRR_WRITE_COMPRESSED(drrw)) {
|
||||
abd_t *decomp_abd =
|
||||
abd_alloc_linear(drrw->drr_logical_size,
|
||||
B_FALSE);
|
||||
|
||||
err = zio_decompress_data(
|
||||
drrw->drr_compressiontype,
|
||||
abd, abd_to_buf(decomp_abd),
|
||||
abd_get_size(abd),
|
||||
abd_get_size(decomp_abd), NULL);
|
||||
|
||||
if (err == 0) {
|
||||
dmu_write_by_dnode(dn,
|
||||
drrw->drr_offset,
|
||||
drrw->drr_logical_size,
|
||||
abd_to_buf(decomp_abd), tx);
|
||||
}
|
||||
abd_free(decomp_abd);
|
||||
} else {
|
||||
dmu_write_by_dnode(dn,
|
||||
drrw->drr_offset,
|
||||
drrw->drr_logical_size,
|
||||
abd_to_buf(abd), tx);
|
||||
}
|
||||
if (err == 0)
|
||||
abd_free(abd);
|
||||
} else {
|
||||
zio_prop_t zp;
|
||||
dmu_write_policy(rwa->os, dn, 0, 0, &zp);
|
||||
|
||||
enum zio_flag zio_flags = 0;
|
||||
|
||||
if (rwa->raw) {
|
||||
zp.zp_encrypt = B_TRUE;
|
||||
zp.zp_compress = drrw->drr_compressiontype;
|
||||
zp.zp_byteorder = ZFS_HOST_BYTEORDER ^
|
||||
!!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
|
||||
rwa->byteswap;
|
||||
bcopy(drrw->drr_salt, zp.zp_salt,
|
||||
ZIO_DATA_SALT_LEN);
|
||||
bcopy(drrw->drr_iv, zp.zp_iv,
|
||||
ZIO_DATA_IV_LEN);
|
||||
bcopy(drrw->drr_mac, zp.zp_mac,
|
||||
ZIO_DATA_MAC_LEN);
|
||||
if (DMU_OT_IS_ENCRYPTED(zp.zp_type)) {
|
||||
zp.zp_nopwrite = B_FALSE;
|
||||
zp.zp_copies = MIN(zp.zp_copies,
|
||||
SPA_DVAS_PER_BP - 1);
|
||||
}
|
||||
zio_flags |= ZIO_FLAG_RAW;
|
||||
} else if (DRR_WRITE_COMPRESSED(drrw)) {
|
||||
ASSERT3U(drrw->drr_compressed_size, >, 0);
|
||||
ASSERT3U(drrw->drr_logical_size, >=,
|
||||
drrw->drr_compressed_size);
|
||||
zp.zp_compress = drrw->drr_compressiontype;
|
||||
zio_flags |= ZIO_FLAG_RAW_COMPRESS;
|
||||
} else if (rwa->byteswap) {
|
||||
/*
|
||||
* Note: compressed blocks never need to be
|
||||
* byteswapped, because WRITE records for
|
||||
* metadata blocks are never compressed. The
|
||||
* exception is raw streams, which are written
|
||||
* in the original byteorder, and the byteorder
|
||||
* bit is preserved in the BP by setting
|
||||
* zp_byteorder above.
|
||||
*/
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drrw->drr_type);
|
||||
dmu_ot_byteswap[byteswap].ob_func(
|
||||
abd_to_buf(abd),
|
||||
DRR_WRITE_PAYLOAD_SIZE(drrw));
|
||||
}
|
||||
|
||||
/*
|
||||
* The size of loaned arc bufs is counted in
|
||||
* arc_loaned_bytes. When we untransform
|
||||
* (decompress) the buf, its size increases. To
|
||||
* ensure that arc_loaned_bytes remains accurate, we
|
||||
* need to return (un-loan) the buf (with its
|
||||
* compressed size) and then re-loan it (with its
|
||||
* new, uncompressed size).
|
||||
* Since this data can't be read until the receive
|
||||
* completes, we can do a "lightweight" write for
|
||||
* improved performance.
|
||||
*/
|
||||
arc_return_buf(abuf, FTAG);
|
||||
VERIFY0(arc_untransform(abuf, dmu_objset_spa(rwa->os),
|
||||
&zb, B_FALSE));
|
||||
arc_loan_inuse_buf(abuf, FTAG);
|
||||
err = dmu_lightweight_write_by_dnode(dn,
|
||||
drrw->drr_offset, abd, &zp, zio_flags, tx);
|
||||
}
|
||||
|
||||
err = dmu_assign_arcbuf_by_dnode(dn,
|
||||
drrw->drr_offset, abuf, tx);
|
||||
if (err != 0) {
|
||||
/*
|
||||
* This rrd is left on the list, so the caller will
|
||||
* free it (and the arc_buf).
|
||||
* free it (and the abd).
|
||||
*/
|
||||
break;
|
||||
}
|
||||
@ -1987,7 +2023,7 @@ flush_write_batch(struct receive_writer_arg *rwa)
|
||||
if (err != 0) {
|
||||
struct receive_record_arg *rrd;
|
||||
while ((rrd = list_remove_head(&rwa->write_batch)) != NULL) {
|
||||
dmu_return_arcbuf(rrd->arc_buf);
|
||||
abd_free(rrd->abd);
|
||||
kmem_free(rrd, sizeof (*rrd));
|
||||
}
|
||||
}
|
||||
@ -2090,9 +2126,8 @@ receive_write_embedded(struct receive_writer_arg *rwa,
|
||||
|
||||
static int
|
||||
receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
arc_buf_t *abuf)
|
||||
abd_t *abd)
|
||||
{
|
||||
dmu_tx_t *tx;
|
||||
dmu_buf_t *db, *db_spill;
|
||||
int err;
|
||||
|
||||
@ -2107,7 +2142,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
* the DRR_FLAG_SPILL_BLOCK flag.
|
||||
*/
|
||||
if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
abd_free(abd);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2131,7 +2166,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
return (err);
|
||||
}
|
||||
|
||||
tx = dmu_tx_create(rwa->os);
|
||||
dmu_tx_t *tx = dmu_tx_create(rwa->os);
|
||||
|
||||
dmu_tx_hold_spill(tx, db->db_object);
|
||||
|
||||
@ -2150,18 +2185,35 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
*/
|
||||
if (db_spill->db_size != drrs->drr_length) {
|
||||
dmu_buf_will_fill(db_spill, tx);
|
||||
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
|
||||
VERIFY0(dbuf_spill_set_blksz(db_spill,
|
||||
drrs->drr_length, tx));
|
||||
}
|
||||
|
||||
if (rwa->byteswap && !arc_is_encrypted(abuf) &&
|
||||
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drrs->drr_type);
|
||||
dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
|
||||
DRR_SPILL_PAYLOAD_SIZE(drrs));
|
||||
arc_buf_t *abuf;
|
||||
if (rwa->raw) {
|
||||
boolean_t byteorder = ZFS_HOST_BYTEORDER ^
|
||||
!!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
|
||||
rwa->byteswap;
|
||||
|
||||
abuf = arc_loan_raw_buf(dmu_objset_spa(rwa->os),
|
||||
drrs->drr_object, byteorder, drrs->drr_salt,
|
||||
drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
|
||||
drrs->drr_compressed_size, drrs->drr_length,
|
||||
drrs->drr_compressiontype, 0);
|
||||
} else {
|
||||
abuf = arc_loan_buf(dmu_objset_spa(rwa->os),
|
||||
DMU_OT_IS_METADATA(drrs->drr_type),
|
||||
drrs->drr_length);
|
||||
if (rwa->byteswap) {
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drrs->drr_type);
|
||||
dmu_ot_byteswap[byteswap].ob_func(abd_to_buf(abd),
|
||||
DRR_SPILL_PAYLOAD_SIZE(drrs));
|
||||
}
|
||||
}
|
||||
|
||||
bcopy(abd_to_buf(abd), abuf->b_data, DRR_SPILL_PAYLOAD_SIZE(drrs));
|
||||
abd_free(abd);
|
||||
dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
|
||||
|
||||
dmu_buf_rele(db, FTAG);
|
||||
@ -2451,53 +2503,19 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
case DRR_WRITE:
|
||||
{
|
||||
struct drr_write *drrw = &drc->drc_rrd->header.drr_u.drr_write;
|
||||
arc_buf_t *abuf;
|
||||
boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
|
||||
|
||||
if (drc->drc_raw) {
|
||||
boolean_t byteorder = ZFS_HOST_BYTEORDER ^
|
||||
!!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
|
||||
drc->drc_byteswap;
|
||||
|
||||
abuf = arc_loan_raw_buf(dmu_objset_spa(drc->drc_os),
|
||||
drrw->drr_object, byteorder, drrw->drr_salt,
|
||||
drrw->drr_iv, drrw->drr_mac, drrw->drr_type,
|
||||
drrw->drr_compressed_size, drrw->drr_logical_size,
|
||||
drrw->drr_compressiontype, 0);
|
||||
} else if (DRR_WRITE_COMPRESSED(drrw)) {
|
||||
ASSERT3U(drrw->drr_compressed_size, >, 0);
|
||||
ASSERT3U(drrw->drr_logical_size, >=,
|
||||
drrw->drr_compressed_size);
|
||||
ASSERT(!is_meta);
|
||||
abuf = arc_loan_compressed_buf(
|
||||
dmu_objset_spa(drc->drc_os),
|
||||
drrw->drr_compressed_size, drrw->drr_logical_size,
|
||||
drrw->drr_compressiontype, 0);
|
||||
} else {
|
||||
abuf = arc_loan_buf(dmu_objset_spa(drc->drc_os),
|
||||
is_meta, drrw->drr_logical_size);
|
||||
}
|
||||
|
||||
err = receive_read_payload_and_next_header(drc,
|
||||
DRR_WRITE_PAYLOAD_SIZE(drrw), abuf->b_data);
|
||||
int size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
abd_t *abd = abd_alloc_linear(size, B_FALSE);
|
||||
err = receive_read_payload_and_next_header(drc, size,
|
||||
abd_to_buf(abd));
|
||||
if (err != 0) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
abd_free(abd);
|
||||
return (err);
|
||||
}
|
||||
drc->drc_rrd->arc_buf = abuf;
|
||||
drc->drc_rrd->abd = abd;
|
||||
receive_read_prefetch(drc, drrw->drr_object, drrw->drr_offset,
|
||||
drrw->drr_logical_size);
|
||||
return (err);
|
||||
}
|
||||
case DRR_WRITE_BYREF:
|
||||
{
|
||||
struct drr_write_byref *drrwb =
|
||||
&drc->drc_rrd->header.drr_u.drr_write_byref;
|
||||
err = receive_read_payload_and_next_header(drc, 0, NULL);
|
||||
receive_read_prefetch(drc, drrwb->drr_object, drrwb->drr_offset,
|
||||
drrwb->drr_length);
|
||||
return (err);
|
||||
}
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
{
|
||||
struct drr_write_embedded *drrwe =
|
||||
@ -2536,29 +2554,14 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
case DRR_SPILL:
|
||||
{
|
||||
struct drr_spill *drrs = &drc->drc_rrd->header.drr_u.drr_spill;
|
||||
arc_buf_t *abuf;
|
||||
/* DRR_SPILL records are either raw or uncompressed */
|
||||
if (drc->drc_raw) {
|
||||
boolean_t byteorder = ZFS_HOST_BYTEORDER ^
|
||||
!!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
|
||||
drc->drc_byteswap;
|
||||
|
||||
abuf = arc_loan_raw_buf(dmu_objset_spa(drc->drc_os),
|
||||
drrs->drr_object, byteorder, drrs->drr_salt,
|
||||
drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
|
||||
drrs->drr_compressed_size, drrs->drr_length,
|
||||
drrs->drr_compressiontype, 0);
|
||||
} else {
|
||||
abuf = arc_loan_buf(dmu_objset_spa(drc->drc_os),
|
||||
DMU_OT_IS_METADATA(drrs->drr_type),
|
||||
drrs->drr_length);
|
||||
}
|
||||
err = receive_read_payload_and_next_header(drc,
|
||||
DRR_SPILL_PAYLOAD_SIZE(drrs), abuf->b_data);
|
||||
int size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
abd_t *abd = abd_alloc_linear(size, B_FALSE);
|
||||
err = receive_read_payload_and_next_header(drc, size,
|
||||
abd_to_buf(abd));
|
||||
if (err != 0)
|
||||
dmu_return_arcbuf(abuf);
|
||||
abd_free(abd);
|
||||
else
|
||||
drc->drc_rrd->arc_buf = abuf;
|
||||
drc->drc_rrd->abd = abd;
|
||||
return (err);
|
||||
}
|
||||
case DRR_OBJECT_RANGE:
|
||||
@ -2687,9 +2690,9 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
if (rrd->header.drr_type != DRR_WRITE) {
|
||||
err = flush_write_batch(rwa);
|
||||
if (err != 0) {
|
||||
if (rrd->arc_buf != NULL) {
|
||||
dmu_return_arcbuf(rrd->arc_buf);
|
||||
rrd->arc_buf = NULL;
|
||||
if (rrd->abd != NULL) {
|
||||
abd_free(rrd->abd);
|
||||
rrd->abd = NULL;
|
||||
rrd->payload = NULL;
|
||||
} else if (rrd->payload != NULL) {
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
@ -2726,8 +2729,8 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
* the rrd or arc_buf.
|
||||
*/
|
||||
ASSERT(err != 0);
|
||||
dmu_return_arcbuf(rrd->arc_buf);
|
||||
rrd->arc_buf = NULL;
|
||||
abd_free(rrd->abd);
|
||||
rrd->abd = NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -2749,10 +2752,10 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
case DRR_SPILL:
|
||||
{
|
||||
struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
|
||||
err = receive_spill(rwa, drrs, rrd->arc_buf);
|
||||
err = receive_spill(rwa, drrs, rrd->abd);
|
||||
if (err != 0)
|
||||
dmu_return_arcbuf(rrd->arc_buf);
|
||||
rrd->arc_buf = NULL;
|
||||
abd_free(rrd->abd);
|
||||
rrd->abd = NULL;
|
||||
rrd->payload = NULL;
|
||||
break;
|
||||
}
|
||||
@ -2800,9 +2803,9 @@ receive_writer_thread(void *arg)
|
||||
int err = 0;
|
||||
if (rwa->err == 0) {
|
||||
err = receive_process_record(rwa, rrd);
|
||||
} else if (rrd->arc_buf != NULL) {
|
||||
dmu_return_arcbuf(rrd->arc_buf);
|
||||
rrd->arc_buf = NULL;
|
||||
} else if (rrd->abd != NULL) {
|
||||
abd_free(rrd->abd);
|
||||
rrd->abd = NULL;
|
||||
rrd->payload = NULL;
|
||||
} else if (rrd->payload != NULL) {
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
@ -609,7 +609,6 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
ASSERT0(dn->dn_maxblkid);
|
||||
ASSERT0(dn->dn_allocated_txg);
|
||||
ASSERT0(dn->dn_assigned_txg);
|
||||
ASSERT0(dn->dn_dirty_txg);
|
||||
ASSERT(zfs_refcount_is_zero(&dn->dn_tx_holds));
|
||||
ASSERT3U(zfs_refcount_count(&dn->dn_holds), <=, 1);
|
||||
ASSERT(avl_is_empty(&dn->dn_dbufs));
|
||||
@ -649,6 +648,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
|
||||
dn->dn_free_txg = 0;
|
||||
dn->dn_dirtyctx_firstset = NULL;
|
||||
dn->dn_dirty_txg = 0;
|
||||
|
||||
dn->dn_allocated_txg = tx->tx_txg;
|
||||
dn->dn_id_flags = 0;
|
||||
@ -1812,6 +1812,7 @@ dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
|
||||
|
||||
ASSERT3U(new_nlevels, >, dn->dn_nlevels);
|
||||
dn->dn_nlevels = new_nlevels;
|
||||
|
||||
ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
|
||||
@ -1829,10 +1830,12 @@ dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx)
|
||||
list = &dn->dn_dirty_records[txgoff];
|
||||
for (dr = list_head(list); dr; dr = dr_next) {
|
||||
dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
|
||||
if (dr->dr_dbuf->db_level != new_nlevels-1 &&
|
||||
|
||||
IMPLY(dr->dr_dbuf == NULL, old_nlevels == 1);
|
||||
if (dr->dr_dbuf == NULL ||
|
||||
(dr->dr_dbuf->db_level == old_nlevels - 1 &&
|
||||
dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
|
||||
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
|
||||
ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
|
||||
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID)) {
|
||||
list_remove(&dn->dn_dirty_records[txgoff], dr);
|
||||
list_insert_tail(&new->dt.di.dr_children, dr);
|
||||
dr->dr_parent = new;
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2020 Oxide Computer Company
|
||||
*/
|
||||
@ -851,6 +851,8 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
/*
|
||||
* Although we have dropped our reference to the dnode, it
|
||||
* can't be evicted until its written, and we haven't yet
|
||||
* initiated the IO for the dnode's dbuf.
|
||||
* initiated the IO for the dnode's dbuf. Additionally, the caller
|
||||
* has already added a reference to the dnode because it's on the
|
||||
* os_synced_dnodes list.
|
||||
*/
|
||||
}
|
||||
|
@ -2267,10 +2267,8 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
|
||||
dsl_bookmark_sync_done(ds, tx);
|
||||
|
||||
if (os->os_synced_dnodes != NULL) {
|
||||
multilist_destroy(os->os_synced_dnodes);
|
||||
os->os_synced_dnodes = NULL;
|
||||
}
|
||||
multilist_destroy(os->os_synced_dnodes);
|
||||
os->os_synced_dnodes = NULL;
|
||||
|
||||
if (os->os_encrypted)
|
||||
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
|
||||
@ -566,6 +566,11 @@ dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
dmu_objset_sync(dp->dp_meta_objset, zio, tx);
|
||||
VERIFY0(zio_wait(zio));
|
||||
dmu_objset_sync_done(dp->dp_meta_objset, tx);
|
||||
taskq_wait(dp->dp_sync_taskq);
|
||||
multilist_destroy(dp->dp_meta_objset->os_synced_dnodes);
|
||||
dp->dp_meta_objset->os_synced_dnodes = NULL;
|
||||
|
||||
dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
|
||||
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
|
||||
}
|
||||
@ -677,7 +682,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
*/
|
||||
for (ds = list_head(&synced_datasets); ds != NULL;
|
||||
ds = list_next(&synced_datasets, ds)) {
|
||||
dmu_objset_do_userquota_updates(ds->ds_objset, tx);
|
||||
dmu_objset_sync_done(ds->ds_objset, tx);
|
||||
}
|
||||
taskq_wait(dp->dp_sync_taskq);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user