mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-25 19:57:43 +03:00
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the same directory, on a machine with 16 CPU cores, I observed poor performance. I noticed that dmu_tx_hold_zap() was using about 30% of all CPU, and doing dnode_hold() 7 times on the same object (the ZAP object that is being held). dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the dnode_t that we already have in hand, rather than repeatedly calling dnode_hold(). To do this, we need to pass the dnode_t down through all the intermediate calls that dmu_tx_hold_zap() makes, making these routines take the dnode_t* rather than an objset_t* and a uint64_t object number. In particular, the following routines will need to have analogous *_by_dnode() variants created: dmu_buf_hold_noread() dmu_buf_hold() zap_lookup() zap_lookup_norm() zap_count_write() zap_lockdir() zap_count_write() This can improve performance on the benchmark described above by 100%, from 30,000 file creations per second to 60,000. (This improvement is on top of that provided by working around the object allocation issue. Peak performance of ~90,000 creations per second was observed with 8 CPUs; adding CPUs past that decreased performance due to lock contention.) The CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds to 40 CPU-seconds. Sponsored by: Intel Corp. Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7004 OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109 Closes #4641 Closes #4972
This commit is contained in:
committed by
Brian Behlendorf
parent
8bea981504
commit
2bce8049c3
+48
-3
@@ -536,6 +536,24 @@ zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
return (err);
|
||||
}
|
||||
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0) {
|
||||
dmu_buf_rele(db, tag);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
|
||||
@@ -927,6 +945,33 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
{
|
||||
return (zap_lookup_norm_by_dnode(dn, name, integer_size,
|
||||
num_integers, buf, MT_EXACT, NULL, 0, NULL));
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err;
|
||||
|
||||
err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_impl(zap, name, integer_size,
|
||||
num_integers, buf, mt, realname, rn_len, ncp);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints)
|
||||
@@ -1460,7 +1505,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
|
||||
}
|
||||
|
||||
int
|
||||
zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
|
||||
uint64_t *towrite, uint64_t *tooverwrite)
|
||||
{
|
||||
zap_t *zap;
|
||||
@@ -1488,7 +1533,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
* At present we are just evaluating the possibility of this operation
|
||||
* and hence we do not want to trigger an upgrade.
|
||||
*/
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE,
|
||||
err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
@@ -1552,7 +1597,7 @@ EXPORT_SYMBOL(zap_lookup_uint64);
|
||||
EXPORT_SYMBOL(zap_contains);
|
||||
EXPORT_SYMBOL(zap_prefetch);
|
||||
EXPORT_SYMBOL(zap_prefetch_uint64);
|
||||
EXPORT_SYMBOL(zap_count_write);
|
||||
EXPORT_SYMBOL(zap_count_write_by_dnode);
|
||||
EXPORT_SYMBOL(zap_add);
|
||||
EXPORT_SYMBOL(zap_add_uint64);
|
||||
EXPORT_SYMBOL(zap_update);
|
||||
|
||||
Reference in New Issue
Block a user