mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
OpenZFS 7004 - dmu_tx_hold_zap() does dnode_hold() 7x on same object
Using a benchmark which has 32 threads creating 2 million files in the same directory, on a machine with 16 CPU cores, I observed poor performance. I noticed that dmu_tx_hold_zap() was using about 30% of all CPU, and doing dnode_hold() 7 times on the same object (the ZAP object that is being held). dmu_tx_hold_zap() keeps a hold on the dnode_t the entire time it is running, in dmu_tx_hold_t:txh_dnode, so it would be nice to use the dnode_t that we already have in hand, rather than repeatedly calling dnode_hold(). To do this, we need to pass the dnode_t down through all the intermediate calls that dmu_tx_hold_zap() makes, making these routines take the dnode_t* rather than an objset_t* and a uint64_t object number. In particular, the following routines will need to have analogous *_by_dnode() variants created: dmu_buf_hold_noread() dmu_buf_hold() zap_lookup() zap_lookup_norm() zap_count_write() zap_lockdir() zap_count_write() This can improve performance on the benchmark described above by 100%, from 30,000 file creations per second to 60,000. (This improvement is on top of that provided by working around the object allocation issue. Peak performance of ~90,000 creations per second was observed with 8 CPUs; adding CPUs past that decreased performance due to lock contention.) The CPU used by dmu_tx_hold_zap() was reduced by 88%, from 340 CPU-seconds to 40 CPU-seconds. Sponsored by: Intel Corp. Signed-off-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7004 OpenZFS-commit: https://github.com/openzfs/openzfs/pull/109 Closes #4641 Closes #4972
This commit is contained in:
committed by
Brian Behlendorf
parent
8bea981504
commit
2bce8049c3
+9
-4
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
* Copyright 2014 HybridCluster. All rights reserved.
|
||||
@@ -73,6 +73,7 @@ struct sa_handle;
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
typedef struct dnode dnode_t;
|
||||
|
||||
typedef enum dmu_object_byteswap {
|
||||
DMU_BSWAP_UINT8,
|
||||
@@ -420,7 +421,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
|
||||
#define WP_DMU_SYNC 0x2
|
||||
#define WP_SPILL 0x4
|
||||
|
||||
void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
|
||||
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||
struct zio_prop *zp);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
@@ -446,7 +447,7 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
|
||||
*/
|
||||
|
||||
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
|
||||
int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
|
||||
void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
|
||||
@@ -466,6 +467,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **, int flags);
|
||||
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags);
|
||||
|
||||
/*
|
||||
* Add a reference to a dmu buffer that has already been held via
|
||||
@@ -620,6 +623,8 @@ void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
|
||||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
objset_t *dmu_buf_get_objset(dmu_buf_t *db);
|
||||
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
|
||||
void dmu_buf_dnode_exit(dmu_buf_t *db);
|
||||
|
||||
/* Block until any in-progress dmu buf user evictions complete. */
|
||||
void dmu_buf_user_evict_wait(void);
|
||||
@@ -799,7 +804,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
|
||||
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
|
||||
void __dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
/* Like dmu_object_info, but faster if you have a held dnode in hand. */
|
||||
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi);
|
||||
/* Like dmu_object_info, but faster if you have a held dbuf in hand. */
|
||||
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
|
||||
/*
|
||||
|
||||
+3
-3
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -187,7 +187,7 @@ typedef struct dnode_phys {
|
||||
#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
|
||||
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
|
||||
|
||||
typedef struct dnode {
|
||||
struct dnode {
|
||||
/*
|
||||
* Protects the structure of the dnode, including the number of levels
|
||||
* of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
|
||||
@@ -286,7 +286,7 @@ typedef struct dnode {
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* Adds a level of indirection between the dbuf and the dnode to avoid
|
||||
|
||||
+8
-1
@@ -236,7 +236,14 @@ int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints);
|
||||
|
||||
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
|
||||
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp);
|
||||
|
||||
int zap_count_write_by_dnode(dnode_t *dn, const char *name,
|
||||
int add, uint64_t *towrite, uint64_t *tooverwrite);
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user