mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Use percpu_counter for obj_alloc counter of Linux-backed caches
A previous commit enabled the tracking of object allocations in Linux-backed caches from the SPL layer for debuggability. The commit is: 9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16 Unfortunately, it also introduced minor performance regressions that were highlighted by the ZFS perf test-suite. Within Delphix we found that the regression would be from -1%, all the way up to -8% for some workloads. This commit brings performance back up to par by creating a separate counter for those caches and making it a percpu in order to avoid lock-contention. The initial performance testing was done by myself, and the final round was conducted by @tonynguien who was also the one that discovered the regression and highlighted the culprit. Reviewed-by: Matt Ahrens <matt@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com> Closes #10397
This commit is contained in:
		
							parent
							
								
									7b232e9354
								
							
						
					
					
						commit
						ec1fea4516
					
				
							
								
								
									
										34
									
								
								config/kernel-percpu.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								config/kernel-percpu.m4
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
			
		||||
dnl #
 | 
			
		||||
dnl # 3.18 API change,
 | 
			
		||||
dnl # The function percpu_counter_init now must be passed a GFP mask.
 | 
			
		||||
dnl #
 | 
			
		||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [
 | 
			
		||||
	ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [
 | 
			
		||||
		#include <linux/gfp.h>
 | 
			
		||||
		#include <linux/percpu_counter.h>
 | 
			
		||||
	],[
 | 
			
		||||
		struct percpu_counter counter;
 | 
			
		||||
		int error;
 | 
			
		||||
 | 
			
		||||
		error = percpu_counter_init(&counter, 0, GFP_KERNEL);
 | 
			
		||||
	])
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [
 | 
			
		||||
	AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t])
 | 
			
		||||
	ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [
 | 
			
		||||
		AC_MSG_RESULT(yes)
 | 
			
		||||
		AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1,
 | 
			
		||||
		    [percpu_counter_init() wants gfp_t])
 | 
			
		||||
	],[
 | 
			
		||||
		AC_MSG_RESULT(no)
 | 
			
		||||
	])
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [
 | 
			
		||||
	ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [
 | 
			
		||||
	ZFS_AC_KERNEL_PERCPU_COUNTER_INIT
 | 
			
		||||
])
 | 
			
		||||
@ -121,6 +121,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 | 
			
		||||
	ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC
 | 
			
		||||
	ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
 | 
			
		||||
	ZFS_AC_KERNEL_SRC_KSTRTOUL
 | 
			
		||||
	ZFS_AC_KERNEL_SRC_PERCPU
 | 
			
		||||
 | 
			
		||||
	AC_MSG_CHECKING([for available kernel interfaces])
 | 
			
		||||
	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
 | 
			
		||||
@ -216,6 +217,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 | 
			
		||||
	ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC
 | 
			
		||||
	ZFS_AC_KERNEL_TOTALHIGH_PAGES
 | 
			
		||||
	ZFS_AC_KERNEL_KSTRTOUL
 | 
			
		||||
	ZFS_AC_KERNEL_PERCPU
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
dnl #
 | 
			
		||||
 | 
			
		||||
@ -5,6 +5,7 @@ KERNEL_H = \
 | 
			
		||||
	blkdev_compat.h \
 | 
			
		||||
	utsname_compat.h \
 | 
			
		||||
	kmap_compat.h \
 | 
			
		||||
	percpu_compat.h \
 | 
			
		||||
	simd.h \
 | 
			
		||||
	simd_x86.h \
 | 
			
		||||
	simd_aarch64.h \
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										44
									
								
								include/os/linux/kernel/linux/percpu_compat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								include/os/linux/kernel/linux/percpu_compat.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,44 @@
 | 
			
		||||
/*
 | 
			
		||||
 * CDDL HEADER START
 | 
			
		||||
 *
 | 
			
		||||
 * The contents of this file are subject to the terms of the
 | 
			
		||||
 * Common Development and Distribution License (the "License").
 | 
			
		||||
 * You may not use this file except in compliance with the License.
 | 
			
		||||
 *
 | 
			
		||||
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | 
			
		||||
 * or http://www.opensolaris.org/os/licensing.
 | 
			
		||||
 * See the License for the specific language governing permissions
 | 
			
		||||
 * and limitations under the License.
 | 
			
		||||
 *
 | 
			
		||||
 * When distributing Covered Code, include this CDDL HEADER in each
 | 
			
		||||
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | 
			
		||||
 * If applicable, add the following below this CDDL HEADER, with the
 | 
			
		||||
 * fields enclosed by brackets "[]" replaced with your own identifying
 | 
			
		||||
 * information: Portions Copyright [yyyy] [name of copyright owner]
 | 
			
		||||
 *
 | 
			
		||||
 * CDDL HEADER END
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (c) 2020 by Delphix. All rights reserved.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef _ZFS_PERCPU_H
 | 
			
		||||
#define	_ZFS_PERCPU_H
 | 
			
		||||
 | 
			
		||||
#include <linux/percpu_counter.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 3.18 API change,
 | 
			
		||||
 * percpu_counter_init() now must be passed a gfp mask which will be
 | 
			
		||||
 * used for the dynamic allocation of the actual counter.
 | 
			
		||||
 */
 | 
			
		||||
#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
 | 
			
		||||
#define	percpu_counter_init_common(counter, n, gfp) \
 | 
			
		||||
	percpu_counter_init(counter, n, gfp)
 | 
			
		||||
#else
 | 
			
		||||
#define	percpu_counter_init_common(counter, n, gfp) \
 | 
			
		||||
	percpu_counter_init(counter, n)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* _ZFS_PERCPU_H */
 | 
			
		||||
@ -202,6 +202,7 @@ typedef struct spl_kmem_cache {
 | 
			
		||||
	uint64_t		skc_slab_max;	/* Slab max historic  */
 | 
			
		||||
	uint64_t		skc_obj_total;	/* Obj total current */
 | 
			
		||||
	uint64_t		skc_obj_alloc;	/* Obj alloc current */
 | 
			
		||||
	struct percpu_counter	skc_linux_alloc;   /* Linux-backed Obj alloc  */
 | 
			
		||||
	uint64_t		skc_obj_max;	/* Obj max historic */
 | 
			
		||||
	uint64_t		skc_obj_deadlock;  /* Obj emergency deadlocks */
 | 
			
		||||
	uint64_t		skc_obj_emergency; /* Obj emergency current */
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,7 @@
 | 
			
		||||
#include <sys/wait.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/swap.h>
 | 
			
		||||
#include <linux/percpu_compat.h>
 | 
			
		||||
#include <linux/prefetch.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
@ -948,6 +949,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
 | 
			
		||||
	skc->skc_obj_emergency = 0;
 | 
			
		||||
	skc->skc_obj_emergency_max = 0;
 | 
			
		||||
 | 
			
		||||
	rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
 | 
			
		||||
	    GFP_KERNEL);
 | 
			
		||||
	if (rc != 0) {
 | 
			
		||||
		kfree(skc);
 | 
			
		||||
		return (NULL);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Verify the requested alignment restriction is sane.
 | 
			
		||||
	 */
 | 
			
		||||
@ -1047,6 +1055,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
 | 
			
		||||
	return (skc);
 | 
			
		||||
out:
 | 
			
		||||
	kfree(skc->skc_name);
 | 
			
		||||
	percpu_counter_destroy(&skc->skc_linux_alloc);
 | 
			
		||||
	kfree(skc);
 | 
			
		||||
	return (NULL);
 | 
			
		||||
}
 | 
			
		||||
@ -1117,6 +1126,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 | 
			
		||||
	ASSERT3U(skc->skc_obj_emergency, ==, 0);
 | 
			
		||||
	ASSERT(list_empty(&skc->skc_complete_list));
 | 
			
		||||
 | 
			
		||||
	ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
 | 
			
		||||
	percpu_counter_destroy(&skc->skc_linux_alloc);
 | 
			
		||||
 | 
			
		||||
	spin_unlock(&skc->skc_lock);
 | 
			
		||||
 | 
			
		||||
	kfree(skc->skc_name);
 | 
			
		||||
@ -1473,9 +1485,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
 | 
			
		||||
			 * how many objects we've allocated in it for
 | 
			
		||||
			 * better debuggability.
 | 
			
		||||
			 */
 | 
			
		||||
			spin_lock(&skc->skc_lock);
 | 
			
		||||
			skc->skc_obj_alloc++;
 | 
			
		||||
			spin_unlock(&skc->skc_lock);
 | 
			
		||||
			percpu_counter_inc(&skc->skc_linux_alloc);
 | 
			
		||||
		}
 | 
			
		||||
		goto ret;
 | 
			
		||||
	}
 | 
			
		||||
@ -1550,9 +1560,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 | 
			
		||||
	 */
 | 
			
		||||
	if (skc->skc_flags & KMC_SLAB) {
 | 
			
		||||
		kmem_cache_free(skc->skc_linux_cache, obj);
 | 
			
		||||
		spin_lock(&skc->skc_lock);
 | 
			
		||||
		skc->skc_obj_alloc--;
 | 
			
		||||
		spin_unlock(&skc->skc_lock);
 | 
			
		||||
		percpu_counter_dec(&skc->skc_linux_alloc);
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -446,16 +446,18 @@ slab_seq_show(struct seq_file *f, void *p)
 | 
			
		||||
		 * the underlying Linux cache please refer to /proc/slabinfo.
 | 
			
		||||
		 */
 | 
			
		||||
		spin_lock(&skc->skc_lock);
 | 
			
		||||
		uint64_t objs_allocated =
 | 
			
		||||
		    percpu_counter_sum(&skc->skc_linux_alloc);
 | 
			
		||||
		seq_printf(f, "%-36s  ", skc->skc_name);
 | 
			
		||||
		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
 | 
			
		||||
		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
 | 
			
		||||
		    (long unsigned)skc->skc_flags,
 | 
			
		||||
		    "-",
 | 
			
		||||
		    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
 | 
			
		||||
		    (long unsigned)(skc->skc_obj_size * objs_allocated),
 | 
			
		||||
		    "-",
 | 
			
		||||
		    (unsigned)skc->skc_obj_size,
 | 
			
		||||
		    "-", "-", "-", "-",
 | 
			
		||||
		    (long unsigned)skc->skc_obj_alloc,
 | 
			
		||||
		    (long unsigned)objs_allocated,
 | 
			
		||||
		    "-", "-", "-", "-");
 | 
			
		||||
		spin_unlock(&skc->skc_lock);
 | 
			
		||||
		return (0);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user