Use percpu_counter for obj_alloc counter of Linux-backed caches

A previous commit enabled the tracking of object allocations
in Linux-backed caches from the SPL layer for debuggability.
The commit is: 9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16

Unfortunately, it also introduced minor performance regressions
that were highlighted by the ZFS perf test-suite. Within Delphix
we found that the regression would be from -1%, all the way up
to -8% for some workloads.

This commit brings performance back up to par by creating a
separate counter for those caches and making it a percpu in
order to avoid lock-contention.

The initial performance testing was done by myself, and the
final round was conducted by @tonynguien who was also the one
that discovered the regression and highlighted the culprit.

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Closes #10397
This commit is contained in:
Serapheim Dimitropoulos
2020-06-26 18:06:50 -07:00
committed by GitHub
parent 7b232e9354
commit ec1fea4516
7 changed files with 100 additions and 8 deletions
+14 -6
View File
@@ -31,6 +31,7 @@
#include <sys/wait.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/percpu_compat.h>
#include <linux/prefetch.h>
/*
@@ -948,6 +949,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0;
rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
GFP_KERNEL);
if (rc != 0) {
kfree(skc);
return (NULL);
}
/*
* Verify the requested alignment restriction is sane.
*/
@@ -1047,6 +1055,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
return (skc);
out:
kfree(skc->skc_name);
percpu_counter_destroy(&skc->skc_linux_alloc);
kfree(skc);
return (NULL);
}
@@ -1117,6 +1126,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
ASSERT3U(skc->skc_obj_emergency, ==, 0);
ASSERT(list_empty(&skc->skc_complete_list));
ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
percpu_counter_destroy(&skc->skc_linux_alloc);
spin_unlock(&skc->skc_lock);
kfree(skc->skc_name);
@@ -1473,9 +1485,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
* how many objects we've allocated in it for
* better debuggability.
*/
spin_lock(&skc->skc_lock);
skc->skc_obj_alloc++;
spin_unlock(&skc->skc_lock);
percpu_counter_inc(&skc->skc_linux_alloc);
}
goto ret;
}
@@ -1550,9 +1560,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
*/
if (skc->skc_flags & KMC_SLAB) {
kmem_cache_free(skc->skc_linux_cache, obj);
spin_lock(&skc->skc_lock);
skc->skc_obj_alloc--;
spin_unlock(&skc->skc_lock);
percpu_counter_dec(&skc->skc_linux_alloc);
return;
}