From d46630e0f316a39899fa8ee02365e015303f2a5d Mon Sep 17 00:00:00 2001
From: behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Date: Tue, 24 Jun 2008 17:18:15 +0000
Subject: [PATCH] The first locking issue was due to the semaphore I used.  I
 was trying to be overly clever and the context switch when the semaphore was
 busy was destroying performance.  Converting to a simple spin lock bough me a
 factor of 50 or so.  That said it's still not good enough.  Tests show bad
 performance and we are still CPU bound.  The logical fix is I need to
 implement per-cpu hot caches to minimize the SMP contention. Linux and
 Solaris both have this, I was hoping to do without but it looks like that's
 not to be.

   kmem_lock: time (sec)        slabs           objs            hash
   kmem_lock:                   tot/max/calc    tot/max/calc    size/depth
   kmem_lock:  0.022000000      7/6/64  224/177/2048    32768/1
   kmem_lock:  0.039000000      13/13/128       416/404/4096    32768/1
   kmem_lock:  0.079000000      23/21/256       736/672/8192    32768/1
   kmem_lock:  0.158000000      48/47/512       1536/1504/16384 32768/1
   kmem_lock:  0.345000000      105/105/1024    3360/3358/32768 32768/2
   kmem_lock:  0.760000000      202/200/2048    6464/6400/65536 32768/3


git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@135 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
---
 include/sys/kmem.h         | 16 ++++++-------
 modules/spl/spl-kmem.c     | 48 +++++++++++++++++++++-----------------
 modules/splat/splat-kmem.c | 12 ++++++----
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/include/sys/kmem.h b/include/sys/kmem.h
index e3810eb2f..fb0c22e3a 100644
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -364,7 +364,7 @@ extern int kmem_set_warning(int flag);
 #define SKS_MAGIC			0x22222222
 #define SKC_MAGIC			0x2c2c2c2c
 
-#define SPL_KMEM_CACHE_HASH_BITS	12 /* 4k, sized for 1000's of objs */
+#define SPL_KMEM_CACHE_HASH_BITS	12
 #define SPL_KMEM_CACHE_HASH_ELTS	(1 << SPL_KMEM_CACHE_HASH_BITS)
 #define SPL_KMEM_CACHE_HASH_SIZE	(sizeof(struct hlist_head) * \
 					 SPL_KMEM_CACHE_HASH_ELTS)
@@ -417,16 +417,16 @@ typedef struct spl_kmem_cache {
         struct list_head	skc_list;	/* List of caches linkage */
 	struct list_head	skc_complete_list;/* Completely alloc'ed */
 	struct list_head	skc_partial_list; /* Partially alloc'ed */
-	struct rw_semaphore	skc_sem;	/* Cache semaphore */
+	spinlock_t		skc_lock;	/* Cache lock */
 	uint64_t		skc_slab_fail;	/* Slab alloc failures */
 	uint64_t		skc_slab_create;/* Slab creates */
 	uint64_t		skc_slab_destroy;/* Slab destroys */
-	uint64_t		skc_slab_total;	/* Slab total */
-	uint64_t		skc_slab_alloc; /* Slab alloc */
-	uint64_t		skc_slab_max;	/* Slab max */
-	uint64_t		skc_obj_total;	/* Obj total */
-	uint64_t		skc_obj_alloc;	/* Obj alloc */
-	uint64_t		skc_obj_max;	/* Obj max */
+	uint64_t		skc_slab_total;	/* Slab total current */
+	uint64_t		skc_slab_alloc; /* Slab alloc current */
+	uint64_t		skc_slab_max;	/* Slab max historic  */
+	uint64_t		skc_obj_total;	/* Obj total current */
+	uint64_t		skc_obj_alloc;	/* Obj alloc current */
+	uint64_t		skc_obj_max;	/* Obj max historic */
 	uint64_t		skc_hash_depth;	/* Hash depth */
 	uint64_t		skc_hash_max;	/* Hash depth max */
 } spl_kmem_cache_t;
diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c
index 24d53a6c2..ec12aca21 100644
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@@ -221,7 +221,7 @@ out:
 }
 
 /* Removes slab from complete or partial list, so it must
- * be called with the 'skc->skc_sem' semaphore held.
+ * be called with the 'skc->skc_lock' held.
  *                         */
 static void
 slab_free(spl_kmem_slab_t *sks) {
@@ -236,9 +236,9 @@ slab_free(spl_kmem_slab_t *sks) {
 	skc->skc_obj_total -= sks->sks_objs;
 	skc->skc_slab_total--;
 
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-	ASSERT(rwsem_is_locked(&skc->skc_sem));
-#endif
+//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	ASSERT(spin_is_locked(&skc->skc_lock));
+//#endif
 
 	list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
 		ASSERT(sko->sko_magic == SKO_MAGIC);
@@ -267,9 +267,9 @@ __slab_reclaim(spl_kmem_cache_t *skc)
 	int rc = 0;
 	ENTRY;
 
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-	ASSERT(rwsem_is_locked(&skc->skc_sem));
-#endif
+//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+	ASSERT(spin_is_locked(&skc->skc_lock));
+//#endif
 	/*
 	 * Free empty slabs which have not been touched in skc_delay
 	 * seconds.  This delay time is important to avoid thrashing.
@@ -296,9 +296,9 @@ slab_reclaim(spl_kmem_cache_t *skc)
 	int rc;
 	ENTRY;
 
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 	rc = __slab_reclaim(skc);
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 
 	RETURN(rc);
 }
@@ -363,7 +363,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
 	INIT_LIST_HEAD(&skc->skc_list);
 	INIT_LIST_HEAD(&skc->skc_complete_list);
 	INIT_LIST_HEAD(&skc->skc_partial_list);
-	init_rwsem(&skc->skc_sem);
+	spin_lock_init(&skc->skc_lock);
         skc->skc_slab_fail = 0;
         skc->skc_slab_create = 0;
         skc->skc_slab_destroy = 0;
@@ -398,7 +398,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
         list_del_init(&skc->skc_list);
         up_write(&spl_kmem_cache_sem);
 
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 
 	/* Validate there are no objects in use and free all the
 	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
@@ -411,7 +411,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 	kmem_free(skc->skc_hash, skc->skc_hash_size);
 	kmem_free(skc->skc_name, skc->skc_name_size);
 	kmem_free(skc, sizeof(*skc));
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 
 	EXIT;
 }
@@ -441,7 +441,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
 	unsigned long key;
 	ENTRY;
 
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 restart:
 	/* Check for available objects from the partial slabs */
 	if (!list_empty(&skc->skc_partial_list)) {
@@ -459,7 +459,7 @@ restart:
 		/* Remove from sks_free_list, add to used hash */
 		list_del_init(&sko->sko_list);
 		key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
-		hlist_add_head_rcu(&sko->sko_hlist, &skc->skc_hash[key]);
+		hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
 
 		sks->sks_age = jiffies;
 		atomic_inc(&sks->sks_ref);
@@ -484,7 +484,7 @@ restart:
 		GOTO(out_lock, obj = sko->sko_addr);
 	}
 
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 
 	/* No available objects create a new slab.  Since this is an
 	 * expensive operation we do it without holding the semaphore
@@ -521,14 +521,14 @@ restart:
 	/* Link the newly created slab in to the skc_partial_list,
 	 * and retry the allocation which will now succeed.
 	 */
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
 	skc->skc_slab_total++;
 	skc->skc_obj_total += sks->sks_objs;
 	list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 	GOTO(restart, obj = NULL);
 
 out_lock:
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 out:
 	RETURN(obj);
 }
@@ -537,16 +537,20 @@ EXPORT_SYMBOL(spl_kmem_cache_alloc);
 void
 spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 {
-        struct hlist_head *head;
         struct hlist_node *node;
         spl_kmem_slab_t *sks = NULL;
 	spl_kmem_obj_t *sko = NULL;
+	unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
+	int i = 0;
 	ENTRY;
 
-	down_write(&skc->skc_sem);
+	spin_lock(&skc->skc_lock);
+
+        hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
+
+		if (unlikely((++i) > skc->skc_hash_depth))
+			skc->skc_hash_depth = i;
 
-        head = &skc->skc_hash[spl_hash_ptr(obj, skc->skc_hash_bits)];
-        hlist_for_each_entry_rcu(sko, node, head, sko_hlist) {
                 if (sko->sko_addr == obj) {
 			ASSERT(sko->sko_magic == SKO_MAGIC);
 			sks = sko->sko_slab;
@@ -583,7 +587,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 	}
 
 	__slab_reclaim(skc);
-	up_write(&skc->skc_sem);
+	spin_unlock(&skc->skc_lock);
 }
 EXPORT_SYMBOL(spl_kmem_cache_free);
 
diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c
index 0d774231d..51fa6f0ef 100644
--- a/modules/splat/splat-kmem.c
+++ b/modules/splat/splat-kmem.c
@@ -584,11 +584,11 @@ splat_kmem_test8(struct file *file, void *arg)
 	kcp.kcp_file = file;
 
         splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
-	             "time (sec)\tslabs       \tobjs\n");
+	             "time (sec)\tslabs       \tobjs        \thash\n");
         splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
-	             "          \ttot/max/calc\ttot/max/calc\n");
+	             "          \ttot/max/calc\ttot/max/calc\tsize/depth\n");
 
-	for (alloc = 64; alloc <= 1024; alloc *= 2) {
+	for (alloc = 64; alloc <= 4096; alloc *= 2) {
 		kcp.kcp_size = 256;
 		kcp.kcp_count = 0;
 		kcp.kcp_threads = 0;
@@ -625,14 +625,16 @@ splat_kmem_test8(struct file *file, void *arg)
 		delta = timespec_sub(stop, start);
 
 	        splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%2ld.%09ld\t"
-			     "%lu/%lu/%lu\t%lu/%lu/%lu\n",
+			     "%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n",
 			     delta.tv_sec, delta.tv_nsec,
 			     (unsigned long)kcp.kcp_cache->skc_slab_total,
 			     (unsigned long)kcp.kcp_cache->skc_slab_max,
 			     (unsigned long)(kcp.kcp_alloc * 32 / SPL_KMEM_CACHE_OBJ_PER_SLAB),
 			     (unsigned long)kcp.kcp_cache->skc_obj_total,
 			     (unsigned long)kcp.kcp_cache->skc_obj_max,
-			     (unsigned long)(kcp.kcp_alloc * 32));
+			     (unsigned long)(kcp.kcp_alloc * 32),
+			     (unsigned long)kcp.kcp_cache->skc_hash_size,
+			     (unsigned long)kcp.kcp_cache->skc_hash_depth);
 
 		kmem_cache_destroy(kcp.kcp_cache);