mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-03-10 04:16:18 +03:00
The performance of `zfs receive` can be bottlenecked on the CPU consumed by the `receive_writer` thread, especially when receiving streams with small compressed block sizes. Much of the CPU is spent creating and destroying dbuf's and arc buf's, one for each `WRITE` record in the send stream. This commit introduces the concept of "lightweight writes", which allows `zfs receive` to write to the DMU by providing an ABD, and instantiating only a new type of `dbuf_dirty_record_t`. The dbuf and arc buf for this "dirty leaf block" are not instantiated. Because there is no dbuf with the dirty data, this mechanism doesn't support reading from "lightweight-dirty" blocks (they would see the on-disk state rather than the dirty data). Since the dedup-receive code has been removed, `zfs receive` is write-only, so this works fine. Because there are no arc bufs for the received data, the received data is no longer cached in the ARC. Testing a receive of a stream with average compressed block size of 4KB, this commit improves performance by 50%, while also reducing CPU usage by 50% of a CPU. On a per-block basis, CPU consumed by receive_writer() and dbuf_evict() is now 1/7th (14%) of what it was. Baseline: 450MB/s, CPU in receive_writer() 40% + dbuf_evict() 35% New: 670MB/s, CPU in receive_writer() 17% + dbuf_evict() 0% The code is also restructured in a few ways: Added a `dr_dnode` field to the dbuf_dirty_record_t. This simplifies some existing code that no longer needs `DB_DNODE_ENTER()` and related routines. The new field is needed by the lightweight-type dirty record. To ensure that the `dr_dnode` field remains valid until the dirty record is freed, we have to ensure that the `dnode_move()` doesn't relocate the dnode_t. To do this we keep a hold on the dnode until it's zio's have completed. This is already done by the user-accounting code (`userquota_updates_task()`), this commit extends that so that it always keeps the dnode hold until zio completion (see `dnode_rele_task()`). `dn_dirty_txg` was previously zeroed when the dnode was synced. This was not necessary, since its meaning can be "when was this dnode last dirtied". This change simplifies the new `dnode_rele_task()` code. Removed some dead code related to `DRR_WRITE_BYREF` (dedup receive). Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Paul Dagnelie <pcd@delphix.com> Reviewed-by: George Wilson <gwilson@delphix.com> Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Closes #11105 |
||
|---|---|---|
| .. | ||
| crypto | ||
| fm | ||
| fs | ||
| lua | ||
| sysevent | ||
| zstd | ||
| abd_impl.h | ||
| abd.h | ||
| aggsum.h | ||
| arc_impl.h | ||
| arc.h | ||
| avl_impl.h | ||
| avl.h | ||
| bitops.h | ||
| blkptr.h | ||
| bplist.h | ||
| bpobj.h | ||
| bptree.h | ||
| bqueue.h | ||
| btree.h | ||
| dataset_kstats.h | ||
| dbuf.h | ||
| ddt.h | ||
| dmu_impl.h | ||
| dmu_objset.h | ||
| dmu_recv.h | ||
| dmu_redact.h | ||
| dmu_send.h | ||
| dmu_traverse.h | ||
| dmu_tx.h | ||
| dmu_zfetch.h | ||
| dmu.h | ||
| dnode.h | ||
| dsl_bookmark.h | ||
| dsl_crypt.h | ||
| dsl_dataset.h | ||
| dsl_deadlist.h | ||
| dsl_deleg.h | ||
| dsl_destroy.h | ||
| dsl_dir.h | ||
| dsl_pool.h | ||
| dsl_prop.h | ||
| dsl_scan.h | ||
| dsl_synctask.h | ||
| dsl_userhold.h | ||
| edonr.h | ||
| efi_partition.h | ||
| frame.h | ||
| hkdf.h | ||
| Makefile.am | ||
| metaslab_impl.h | ||
| metaslab.h | ||
| mmp.h | ||
| mntent.h | ||
| mod.h | ||
| multilist.h | ||
| note.h | ||
| nvpair_impl.h | ||
| nvpair.h | ||
| objlist.h | ||
| pathname.h | ||
| qat.h | ||
| range_tree.h | ||
| rrwlock.h | ||
| sa_impl.h | ||
| sa.h | ||
| skein.h | ||
| spa_boot.h | ||
| spa_checkpoint.h | ||
| spa_checksum.h | ||
| spa_impl.h | ||
| spa_log_spacemap.h | ||
| spa.h | ||
| space_map.h | ||
| space_reftree.h | ||
| sysevent.h | ||
| txg_impl.h | ||
| txg.h | ||
| u8_textprep_data.h | ||
| u8_textprep.h | ||
| uberblock_impl.h | ||
| uberblock.h | ||
| uio_impl.h | ||
| unique.h | ||
| uuid.h | ||
| vdev_disk.h | ||
| vdev_draid.h | ||
| vdev_file.h | ||
| vdev_impl.h | ||
| vdev_indirect_births.h | ||
| vdev_indirect_mapping.h | ||
| vdev_initialize.h | ||
| vdev_raidz_impl.h | ||
| vdev_raidz.h | ||
| vdev_rebuild.h | ||
| vdev_removal.h | ||
| vdev_trim.h | ||
| vdev.h | ||
| xvattr.h | ||
| zap_impl.h | ||
| zap_leaf.h | ||
| zap.h | ||
| zcp_global.h | ||
| zcp_iter.h | ||
| zcp_prop.h | ||
| zcp_set.h | ||
| zcp.h | ||
| zfeature.h | ||
| zfs_acl.h | ||
| zfs_bootenv.h | ||
| zfs_context.h | ||
| zfs_debug.h | ||
| zfs_delay.h | ||
| zfs_file.h | ||
| zfs_fuid.h | ||
| zfs_ioctl_impl.h | ||
| zfs_ioctl.h | ||
| zfs_onexit.h | ||
| zfs_project.h | ||
| zfs_quota.h | ||
| zfs_ratelimit.h | ||
| zfs_refcount.h | ||
| zfs_rlock.h | ||
| zfs_sa.h | ||
| zfs_stat.h | ||
| zfs_sysfs.h | ||
| zfs_vfsops.h | ||
| zfs_vnops.h | ||
| zfs_znode.h | ||
| zil_impl.h | ||
| zil.h | ||
| zio_checksum.h | ||
| zio_compress.h | ||
| zio_crypt.h | ||
| zio_impl.h | ||
| zio_priority.h | ||
| zio.h | ||
| zrlock.h | ||
| zthr.h | ||
| zvol_impl.h | ||
| zvol.h | ||