mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 19:28:53 +03:00
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the same directory, on a machine with 16 CPU cores, I observed poor performance. I noticed that dmu_tx_hold_zap() was using about 30% of all CPU, and doing dnode_hold() 7 times on the same object (the ZAP object that is being held). dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the dnode_t that we already have in hand, rather than repeatedly calling dnode_hold(). To do this, we need to pass the dnode_t down through all the intermediate calls that dmu_tx_hold_zap() makes, making these routines take the dnode_t* rather than an objset_t* and a uint64_t object number. In particular, the following routines will need to have analogous *_by_dnode() variants created: dmu_buf_hold_noread() dmu_buf_hold() zap_lookup() zap_lookup_norm() zap_count_write() zap_lockdir() zap_count_write() This can improve performance on the benchmark described above by 100%, from 30,000 file creations per second to 60,000. (This improvement is on top of that provided by working around the object allocation issue. Peak performance of ~90,000 creations per second was observed with 8 CPUs; adding CPUs past that decreased performance due to lock contention.) The CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds to 40 CPU-seconds. Sponsored by: Intel Corp. Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7004 OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109 Closes #4641 Closes #4972
This commit is contained in:
committed by
Brian Behlendorf
parent
8bea981504
commit
2bce8049c3
@@ -127,6 +127,26 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||
{ zfs_acl_byteswap, "acl" }
|
||||
};
|
||||
|
||||
int
|
||||
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp)
|
||||
{
|
||||
uint64_t blkid;
|
||||
dmu_buf_impl_t *db;
|
||||
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
db = dbuf_hold(dn, blkid, tag);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (db == NULL) {
|
||||
*dbp = NULL;
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
*dbp = &db->db;
|
||||
return (0);
|
||||
}
|
||||
int
|
||||
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp)
|
||||
@@ -154,6 +174,29 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
{
|
||||
int err;
|
||||
int db_flags = DB_RF_CANFAIL;
|
||||
|
||||
if (flags & DMU_READ_NO_PREFETCH)
|
||||
db_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
|
||||
if (err == 0) {
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
||||
err = dbuf_read(db, NULL, db_flags);
|
||||
if (err != 0) {
|
||||
dbuf_rele(db, tag);
|
||||
*dbp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
|
||||
Reference in New Issue
Block a user