From e11d6c5f50ff1cb9a75f5c6a6895846f73564422 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 17 Mar 2009 12:16:31 -0700 Subject: [PATCH] FC10/i686 Compatibility Update (2.6.27.19-170.2.35.fc10.i686) In the interests of portability I have added a FC10/i686 box to my list of development platforms. The hope is this will allow me to keep current with upstream kernel API changes, and at the same time ensure I don't accidentally break x86 support. This patch resolves all remaining issues observed under that environment. 1) SPL_AC_ZONE_STAT_ITEM_FIA autoconf check added. As of 2.6.21 the kernel added a clean API for modules to get the global count for free, inactive, and active pages. The SPL attempts to detect if this API is available and directly map spl_global_page_state() to global_page_state(). If the full API is not available then spl_global_page_state() is implemented as a thin layer to get these values via get_zone_counts() if that symbol is available. 2) New kmem:vmem_size regression test added to validate correct vmem_size() functionality. The test case acquires the current global vmem state, allocates from the vmem region, then verifies the allocation is correctly reflected in the vmem_size() stats. 3) Change splat_kmem_cache_thread_test() to always use KMC_KMEM based memory. On x86 systems with limited virtual address space failures resulted due to exhaustig the address space. The tests really need to problem exhausting all memory on the system thus we need to use the physical address space. 4) Change kmem:slab_lock to cap it's memory usage at availrmem instead of using the native linux nr_free_pages(). This provides additional test coverage of the SPL Linux VM integration. 5) Change kmem:slab_overcommit to perform allocation of 256K instead of 1M. On x86 based systems it is not possible to create a kmem backed slab with entires of that size. To compensate for this the number of allocations performed in increased by 4x. 6) Additional autoconf documentation for proposed upstream API changes to make additional symbols available to modules. 7) Console error messages added when spl_kallsyms_lookup_name() fails to locate an expected symbol. This causes the module to fail to load and we need to know exactly which symbol was not available. --- config/spl-build.m4 | 53 ++++++++++++++++++-- configure | 68 ++++++++++++++++++++++++++ configure.ac | 1 + include/sys/vmsystm.h | 17 +++++-- module/spl/spl-kmem.c | 65 ++++++++++++++++++++----- module/splat/splat-kmem.c | 100 ++++++++++++++++++++++++++++++++++---- spl_config.h.in | 3 ++ 7 files changed, 277 insertions(+), 30 deletions(-) diff --git a/config/spl-build.m4 b/config/spl-build.m4 index d0cf86d9a..1013e1f4c 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -752,7 +752,11 @@ AC_DEFUN([SPL_AC_KALLSYMS_LOOKUP_NAME], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_GET_VMALLOC_INFO], [ SPL_CHECK_SYMBOL_EXPORT( @@ -764,7 +768,11 @@ AC_DEFUN([SPL_AC_GET_VMALLOC_INFO], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [ SPL_CHECK_SYMBOL_EXPORT( @@ -776,7 +784,11 @@ AC_DEFUN([SPL_AC_FIRST_ONLINE_PGDAT], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [ SPL_CHECK_SYMBOL_EXPORT( @@ -788,7 +800,11 @@ AC_DEFUN([SPL_AC_NEXT_ONLINE_PGDAT], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_NEXT_ZONE], [ SPL_CHECK_SYMBOL_EXPORT( @@ -800,7 +816,11 @@ AC_DEFUN([SPL_AC_NEXT_ZONE], [ ]) dnl # -dnl # Symbol only available in custom kernels +dnl # Proposed API change, +dnl # This symbol is not available in stock kernels. You may build a +dnl # custom kernel with the *-spl-export-symbols.patch which will export +dnl # these symbols for use. If your already rolling a custom kernel for +dnl # your environment this is recommended. dnl # AC_DEFUN([SPL_AC_GET_ZONE_COUNTS], [ SPL_CHECK_SYMBOL_EXPORT( @@ -810,3 +830,26 @@ AC_DEFUN([SPL_AC_GET_ZONE_COUNTS], [ [get_zone_counts() is available])], []) ]) + +dnl # +dnl # 2.6.21 API change, +dnl # Public global zone stats now include free/inactive/active page +dnl # counts. This replaced the priviate get_zone_counts() interface. +dnl # +AC_DEFUN([SPL_AC_ZONE_STAT_ITEM_FIA], [ + AC_MSG_CHECKING([whether free/inactive/active page state is available]) + SPL_LINUX_TRY_COMPILE([ + #include + ],[ + enum zone_stat_item i1, i2, i3; + i1 = NR_FREE_PAGES; + i2 = NR_INACTIVE; + i3 = NR_ACTIVE; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ZONE_STAT_ITEM_FIA, 1, + [free/inactive/active page state is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/configure b/configure index 0467b602d..f3deee9c8 100755 --- a/configure +++ b/configure @@ -20872,6 +20872,74 @@ _ACEOF + echo "$as_me:$LINENO: checking whether free/inactive/active page state is available" >&5 +echo $ECHO_N "checking whether free/inactive/active page state is available... $ECHO_C" >&6 + + +cat >conftest.c <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + + #include + +int +main (void) +{ + + enum zone_stat_item i1, i2, i3; + i1 = NR_FREE_PAGES; + i2 = NR_INACTIVE; + i3 = NR_ACTIVE; + + ; + return 0; +} + +_ACEOF + + + rm -Rf build && mkdir -p build + echo "obj-m := conftest.o" >build/Makefile + if { ac_try='cp conftest.c build && make modules CC="$CC" LINUXINCLUDE="-Iinclude -Iinclude2 -I$LINUX/include -include include/linux/autoconf.h" -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } >/dev/null && { ac_try='test -s build/conftest.o' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ZONE_STAT_ITEM_FIA 1 +_ACEOF + + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + + + +fi + + rm -Rf build + + + + ac_config_files="$ac_config_files Makefile lib/Makefile cmd/Makefile module/Makefile module/spl/Makefile module/splat/Makefile include/Makefile scripts/Makefile spl.spec" diff --git a/configure.ac b/configure.ac index 623d54dd5..bdec31c4b 100644 --- a/configure.ac +++ b/configure.ac @@ -74,6 +74,7 @@ SPL_AC_FIRST_ONLINE_PGDAT SPL_AC_NEXT_ONLINE_PGDAT SPL_AC_NEXT_ZONE SPL_AC_GET_ZONE_COUNTS +SPL_AC_ZONE_STAT_ITEM_FIA AC_CONFIG_FILES([ Makefile diff --git a/include/sys/vmsystm.h b/include/sys/vmsystm.h index 861e51164..123005e5e 100644 --- a/include/sys/vmsystm.h +++ b/include/sys/vmsystm.h @@ -115,13 +115,24 @@ extern next_zone_t next_zone_fn; #endif /* HAVE_NEXT_ZONE */ /* Source linux/mm/vmstat.c */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS typedef void (*get_zone_counts_t)(unsigned long *, unsigned long *, unsigned long *); extern get_zone_counts_t get_zone_counts_fn; -#define get_zone_counts(a,i,f) get_zone_counts_fn(a,i,f) -#endif /* HAVE_GET_ZONE_COUNTS */ +# define get_zone_counts(a,i,f) get_zone_counts_fn(a,i,f) +extern unsigned long spl_global_page_state(int); +/* Defines designed to simulate enum but large enough to ensure no overlap */ +# define NR_FREE_PAGES 0x8001 +# define NR_INACTIVE 0x8002 +# define NR_ACTIVE 0x8003 +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#else +#define spl_global_page_state(item) global_page_state(item) +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ #define xcopyin(from, to, size) copy_from_user(to, from, size) #define xcopyout(from, to, size) copy_to_user(to, from, size) diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 944300bb4..6723dcd08 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -99,22 +99,47 @@ next_zone_t next_zone_fn = NULL; EXPORT_SYMBOL(next_zone_fn); #endif /* HAVE_NEXT_ZONE */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS get_zone_counts_t get_zone_counts_fn = NULL; EXPORT_SYMBOL(get_zone_counts_fn); -#endif /* HAVE_GET_ZONE_COUNTS */ -pgcnt_t -spl_kmem_availrmem(void) +unsigned long +spl_global_page_state(int item) { unsigned long active; unsigned long inactive; unsigned long free; - get_zone_counts(&active, &inactive, &free); + if (item == NR_FREE_PAGES) { + get_zone_counts(&active, &inactive, &free); + return free; + } + if (item == NR_INACTIVE) { + get_zone_counts(&active, &inactive, &free); + return inactive; + } + + if (item == NR_ACTIVE) { + get_zone_counts(&active, &inactive, &free); + return active; + } + + return global_page_state((enum zone_stat_item)item); +} +EXPORT_SYMBOL(spl_global_page_state); +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ + +pgcnt_t +spl_kmem_availrmem(void) +{ /* The amount of easily available memory */ - return free + inactive; + return (spl_global_page_state(NR_FREE_PAGES) + + spl_global_page_state(NR_INACTIVE)); } EXPORT_SYMBOL(spl_kmem_availrmem); @@ -1773,37 +1798,51 @@ spl_kmem_init_kallsyms_lookup(void) #ifndef HAVE_GET_VMALLOC_INFO get_vmalloc_info_fn = (get_vmalloc_info_t) spl_kallsyms_lookup_name("get_vmalloc_info"); - if (!get_vmalloc_info_fn) + if (!get_vmalloc_info_fn) { + printk(KERN_ERR "Error: Unknown symbol get_vmalloc_info\n"); return -EFAULT; + } #endif /* HAVE_GET_VMALLOC_INFO */ #ifndef HAVE_FIRST_ONLINE_PGDAT first_online_pgdat_fn = (first_online_pgdat_t) spl_kallsyms_lookup_name("first_online_pgdat"); - if (!first_online_pgdat_fn) + if (!first_online_pgdat_fn) { + printk(KERN_ERR "Error: Unknown symbol first_online_pgdat\n"); return -EFAULT; + } #endif /* HAVE_FIRST_ONLINE_PGDAT */ #ifndef HAVE_NEXT_ONLINE_PGDAT next_online_pgdat_fn = (next_online_pgdat_t) spl_kallsyms_lookup_name("next_online_pgdat"); - if (!next_online_pgdat_fn) + if (!next_online_pgdat_fn) { + printk(KERN_ERR "Error: Unknown symbol next_online_pgdat\n"); return -EFAULT; + } #endif /* HAVE_NEXT_ONLINE_PGDAT */ #ifndef HAVE_NEXT_ZONE next_zone_fn = (next_zone_t) spl_kallsyms_lookup_name("next_zone"); - if (!next_zone_fn) + if (!next_zone_fn) { + printk(KERN_ERR "Error: Unknown symbol next_zone\n"); return -EFAULT; + } #endif /* HAVE_NEXT_ZONE */ -#ifndef HAVE_GET_ZONE_COUNTS +#ifndef HAVE_ZONE_STAT_ITEM_FIA +# ifndef HAVE_GET_ZONE_COUNTS get_zone_counts_fn = (get_zone_counts_t) spl_kallsyms_lookup_name("get_zone_counts"); - if (!get_zone_counts_fn) + if (!get_zone_counts_fn) { + printk(KERN_ERR "Error: Unknown symbol get_zone_counts\n"); return -EFAULT; -#endif /* HAVE_GET_ZONE_COUNTS */ + } +# else +# error "HAVE_ZONE_STAT_ITEM_FIA and HAVE_GET_ZONE_COUNTS unavailable" +# endif /* HAVE_GET_ZONE_COUNTS */ +#endif /* HAVE_ZONE_STAT_ITEM_FIA */ /* * It is now safe to initialize the global tunings which rely on diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c index 35718e2f8..f12cd34b3 100644 --- a/module/splat/splat-kmem.c +++ b/module/splat/splat-kmem.c @@ -74,6 +74,10 @@ #define SPLAT_KMEM_TEST11_NAME "slab_overcommit" #define SPLAT_KMEM_TEST11_DESC "Slab memory overcommit test" +#define SPLAT_KMEM_TEST12_ID 0x010c +#define SPLAT_KMEM_TEST12_NAME "vmem_size" +#define SPLAT_KMEM_TEST12_DESC "Memory zone test" + #define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10 @@ -652,7 +656,7 @@ splat_kmem_cache_thread_test(struct file *file, void *arg, char *name, splat_kmem_cache_test_constructor, splat_kmem_cache_test_destructor, splat_kmem_cache_test_reclaim, - kcp, NULL, KMC_VMEM); + kcp, NULL, KMC_KMEM); if (!kcp->kcp_cache) { splat_vprint(file, name, "Unable to create '%s'\n", cache_name); rc = -ENOMEM; @@ -973,9 +977,8 @@ splat_kmem_test9(struct file *file, void *arg) static int splat_kmem_test10(struct file *file, void *arg) { - uint64_t size, alloc, free_mem, rc = 0; + uint64_t size, alloc, rc = 0; - free_mem = nr_free_pages() * PAGE_SIZE; for (size = 16; size <= 1024*1024; size *= 2) { splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", @@ -985,8 +988,9 @@ splat_kmem_test10(struct file *file, void *arg) for (alloc = 1; alloc <= 1024; alloc *= 2) { - /* Skip tests which exceed free memory */ - if (size * alloc * SPLAT_KMEM_THREADS > free_mem / 2) + /* Skip tests which exceed available memory. We + * leverage availrmem here for some extra testing */ + if (size * alloc * SPLAT_KMEM_THREADS > availrmem / 2) continue; rc = splat_kmem_cache_thread_test(file, arg, @@ -1014,12 +1018,12 @@ splat_kmem_test11(struct file *file, void *arg) { uint64_t size, alloc, rc; - size = 1024*1024; - alloc = ((4 * num_physpages * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS; + size = 256*1024; + alloc = ((4 * physmem * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS; - splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", + splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "name", "time (sec)\tslabs \tobjs \thash\n"); - splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "", + splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s %s", "", " \ttot/max/calc\ttot/max/calc\n"); rc = splat_kmem_cache_thread_test(file, arg, @@ -1028,6 +1032,81 @@ splat_kmem_test11(struct file *file, void *arg) return rc; } +/* + * Check vmem_size() behavior by acquiring the alloc/free/total vmem + * space, then allocate a known buffer size from vmem space. We can + * then check that vmem_size() values were updated properly with in + * a fairly small tolerence. The tolerance is important because we + * are not the only vmem consumer on the system. Other unrelated + * allocations might occur during the small test window. The vmem + * allocation itself may also add in a little extra private space to + * the buffer. Finally, verify total space always remains unchanged. + */ +static int +splat_kmem_test12(struct file *file, void *arg) +{ + ssize_t alloc1, free1, total1; + ssize_t alloc2, free2, total2; + int size = 8*1024*1024; + void *ptr; + + alloc1 = vmem_size(NULL, VMEM_ALLOC); + free1 = vmem_size(NULL, VMEM_FREE); + total1 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%d free=%d " + "total=%d\n", (int)alloc1, (int)free1, (int)total1); + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Alloc %d bytes\n", size); + ptr = vmem_alloc(size, KM_SLEEP); + if (!ptr) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed to alloc %d bytes\n", size); + return -ENOMEM; + } + + alloc2 = vmem_size(NULL, VMEM_ALLOC); + free2 = vmem_size(NULL, VMEM_FREE); + total2 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%d free=%d " + "total=%d\n", (int)alloc2, (int)free2, (int)total2); + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Free %d bytes\n", size); + vmem_free(ptr, size); + if (alloc2 < (alloc1 + size - (size / 100)) || + alloc2 > (alloc1 + size + (size / 100))) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_ALLOC size: %d != %d+%d (+/- 1%%)\n", + (int)alloc2, (int)alloc1, size); + return -ERANGE; + } + + if (free2 < (free1 - size - (size / 100)) || + free2 > (free1 - size + (size / 100))) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_FREE size: %d != %d-%d (+/- 1%%)\n", + (int)free2, (int)free1, size); + return -ERANGE; + } + + if (total1 != total2) { + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "Failed VMEM_ALLOC | VMEM_FREE not constant: " + "%d != %d\n", (int)total2, (int)total1); + return -ERANGE; + } + + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "VMEM_ALLOC within tolerance: ~%d%% (%d/%d)\n", + (int)(((alloc1 + size) - alloc2) * 100 / size), + (int)((alloc1 + size) - alloc2), size); + splat_vprint(file, SPLAT_KMEM_TEST12_NAME, + "VMEM_FREE within tolerance: ~%d%% (%d/%d)\n", + (int)(((free1 - size) - free2) * 100 / size), + (int)((free1 - size) - free2), size); + + return 0; +} + splat_subsystem_t * splat_kmem_init(void) { @@ -1067,6 +1146,8 @@ splat_kmem_init(void) SPLAT_KMEM_TEST10_ID, splat_kmem_test10); SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC, SPLAT_KMEM_TEST11_ID, splat_kmem_test11); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST12_NAME, SPLAT_KMEM_TEST12_DESC, + SPLAT_KMEM_TEST12_ID, splat_kmem_test12); return sub; } @@ -1075,6 +1156,7 @@ void splat_kmem_fini(splat_subsystem_t *sub) { ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST12_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID); SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID); diff --git a/spl_config.h.in b/spl_config.h.in index f2895054d..4d11efb3c 100644 --- a/spl_config.h.in +++ b/spl_config.h.in @@ -132,6 +132,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* free/inactive/active page state is available */ +#undef HAVE_ZONE_STAT_ITEM_FIA + /* Name of package */ #undef PACKAGE