More aggsum optimizations

- Avoid atomic_add() when updating as_lower_bound/as_upper_bound.
Previous code was excessively strong on 64bit systems while not
strong enough on 32bit ones.  Instead introduce and use real
atomic_load() and atomic_store() operations, just an assignments
on 64bit machines, but using proper atomics on 32bit ones to avoid
torn reads/writes.

 - Reduce number of buckets on large systems.  Extra buckets not as
much improve add speed, as hurt reads.  Unlike wmsum for aggsum
reads are still important.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12145
This commit is contained in:
Alexander Motin
2021-06-07 12:02:47 -04:00
committed by GitHub
parent e5e76bd643
commit ea400129c3
5 changed files with 129 additions and 63 deletions
+43
View File
@@ -245,6 +245,49 @@ extern ulong_t atomic_swap_ulong(volatile ulong_t *, ulong_t);
extern uint64_t atomic_swap_64(volatile uint64_t *, uint64_t);
#endif
/*
* Atomically read variable.
*/
#define atomic_load_char(p) (*(volatile uchar_t *)(p))
#define atomic_load_short(p) (*(volatile ushort_t *)(p))
#define atomic_load_int(p) (*(volatile uint_t *)(p))
#define atomic_load_long(p) (*(volatile ulong_t *)(p))
#define atomic_load_ptr(p) (*(volatile __typeof(*p) *)(p))
#define atomic_load_8(p) (*(volatile uint8_t *)(p))
#define atomic_load_16(p) (*(volatile uint16_t *)(p))
#define atomic_load_32(p) (*(volatile uint32_t *)(p))
#ifdef _LP64
#define atomic_load_64(p) (*(volatile uint64_t *)(p))
#elif defined(_INT64_TYPE)
extern uint64_t atomic_load_64(volatile uint64_t *);
#endif
/*
* Atomically write variable.
*/
#define atomic_store_char(p, v) \
(*(volatile uchar_t *)(p) = (uchar_t)(v))
#define atomic_store_short(p, v) \
(*(volatile ushort_t *)(p) = (ushort_t)(v))
#define atomic_store_int(p, v) \
(*(volatile uint_t *)(p) = (uint_t)(v))
#define atomic_store_long(p, v) \
(*(volatile ulong_t *)(p) = (ulong_t)(v))
#define atomic_store_ptr(p, v) \
(*(volatile __typeof(*p) *)(p) = (v))
#define atomic_store_8(p, v) \
(*(volatile uint8_t *)(p) = (uint8_t)(v))
#define atomic_store_16(p, v) \
(*(volatile uint16_t *)(p) = (uint16_t)(v))
#define atomic_store_32(p, v) \
(*(volatile uint32_t *)(p) = (uint32_t)(v))
#ifdef _LP64
#define atomic_store_64(p, v) \
(*(volatile uint64_t *)(p) = (uint64_t)(v))
#elif defined(_INT64_TYPE)
extern void atomic_store_64(volatile uint64_t *, uint64_t);
#endif
/*
* Perform an exclusive atomic bit set/clear on a target.
* Returns 0 if bit was successfully set/cleared, or -1