Illumos 5008 - lock contention (rrw_exit) while running a read only load

5008 lock contention (rrw_exit) while running a read only load
Reviewed by: Matthew Ahrens <matthew.ahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Alex Reece <alex.reece@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com>
Approved by: Garrett D'Amore <garrett@damore.org>

Porting notes:

This patch ported perfectly cleanly to ZoL.  During testing 100% cached
small-block reads, extreme contention was noticed on rrl->rr_lock from
rrw_exit() due to the frequent entering and leaving ZPL.  Illumos picked
up this patch from FreeBSD and it also helps under Linux.

On a 1-minute 4K cached read test with 10 fio processes pinned to a single
socket on a 4-socket (10 thread per socket) NUMA system, contentions on
rrl->rr_lock were reduced from 508799 to 43085.

Ported-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3555
This commit is contained in:
Alexander Motin
2014-07-18 08:53:38 -08:00
committed by Brian Behlendorf
parent 4bda3bd0e7
commit e16b3fcc61
6 changed files with 126 additions and 13 deletions
+25
View File
@@ -83,6 +83,31 @@ void rrw_tsd_destroy(void *arg);
#define RRW_LOCK_HELD(x) \
(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
/*
* A reader-mostly lock implementation, tuning above reader-writer locks
* for hightly parallel read acquisitions, pessimizing write acquisitions.
*
* This should be a prime number. See comment in rrwlock.c near
* RRM_TD_LOCK() for details.
*/
#define RRM_NUM_LOCKS 17
typedef struct rrmlock {
rrwlock_t locks[RRM_NUM_LOCKS];
} rrmlock_t;
void rrm_init(rrmlock_t *rrl, boolean_t track_all);
void rrm_destroy(rrmlock_t *rrl);
void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
void rrm_enter_read(rrmlock_t *rrl, void *tag);
void rrm_enter_write(rrmlock_t *rrl);
void rrm_exit(rrmlock_t *rrl, void *tag);
boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
#define RRM_READ_HELD(x) rrm_held(x, RW_READER)
#define RRM_WRITE_HELD(x) rrm_held(x, RW_WRITER)
#define RRM_LOCK_HELD(x) \
(rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
#ifdef __cplusplus
}
#endif
+1 -1
View File
@@ -67,7 +67,7 @@ typedef struct zfs_sb {
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_relatime; /* enable relatime mount option */
boolean_t z_unmounted; /* unmounted */
rrwlock_t z_teardown_lock;
rrmlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all znodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
+2 -2
View File
@@ -250,7 +250,7 @@ typedef struct znode {
/* Called on entry to each ZFS vnode and vfs operation */
#define ZFS_ENTER(zsb) \
{ \
rrw_enter_read(&(zsb)->z_teardown_lock, FTAG); \
rrm_enter_read(&(zsb)->z_teardown_lock, FTAG); \
if ((zsb)->z_unmounted) { \
ZFS_EXIT(zsb); \
return (EIO); \
@@ -260,7 +260,7 @@ typedef struct znode {
/* Must be called before exiting the vop */
#define ZFS_EXIT(zsb) \
{ \
rrw_exit(&(zsb)->z_teardown_lock, FTAG); \
rrm_exit(&(zsb)->z_teardown_lock, FTAG); \
}
/* Verifies the znode is valid */