mirror_zfs/module/spl/spl-proc.c
Brian Behlendorf c3eabc75b1 Refactor generic memory allocation interfaces
This patch achieves the following goals:

1. It replaces the preprocessor kmem flag to gfp flag mapping with
   proper translation logic. This eliminates the potential for
   surprises that were previously possible where kmem flags were
   mapped to gfp flags.

2. It maps vmem_alloc() allocations to kmem_alloc() for allocations
   sized less than or equal to the newly-added spl_kmem_alloc_max
   parameter.  This ensures that small allocations will not contend
   on a single global lock, large allocations can still be handled,
   and potentially limited virtual address space will not be squandered.
   This behavior is entirely different than under Illumos due to
   different memory management strategies employed by the respective
   kernels.  However, this functionally provides the semantics required.

3. The --disable-debug-kmem, --enable-debug-kmem (default), and
   --enable-debug-kmem-tracking allocators have been unified in to
   a single spl_kmem_alloc_impl() allocation function.  This was
   done to simplify the code and make it more maintainable.

4. Improve portability by exposing an implementation of the memory
   allocations functions that can be safely used in the same way
   they are used on Illumos.   Specifically, callers may safely
   use KM_SLEEP in contexts which perform filesystem IO.  This
   allows us to eliminate an entire class of Linux specific changes
   which were previously required to avoid deadlocking the system.

This change will be largely transparent to existing callers but there
are a few caveats:

1. Because the headers were refactored and extraneous includes removed
   callers may find they need to explicitly add additional #includes.
   In particular, kmem_cache.h must now be explicitly includes to
   access the SPL's kmem cache implementation.  This behavior is
   different from Illumos but it was done to avoid always masking
   the Linux slab functions when kmem.h is included.

2. Callers, like Lustre, which made assumptions about the definitions
   of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated.
   Other callers such as ZFS which did not will not require changes.

3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO.  It retains
   its original meaning of allowing allocations to access reserved
   memory.  KM_PUSHPAGE callers can be converted back to KM_SLEEP.

4. The KM_NODEBUG flags has been retired and the default warning
   threshold increased to 32k.

5. The kmem_virt() functions has been removed.  For callers which
   need to distinguish between a physical and virtual address use
   is_vmalloc_addr().

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2015-01-16 13:55:09 -08:00

535 lines
15 KiB
C

/*****************************************************************************\
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Brian Behlendorf <behlendorf1@llnl.gov>.
* UCRL-CODE-235197
*
* This file is part of the SPL, Solaris Porting Layer.
* For details, see <http://zfsonlinux.org/>.
*
* The SPL is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The SPL is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*****************************************************************************
* Solaris Porting Layer (SPL) Proc Implementation.
\*****************************************************************************/
#include <sys/systeminfo.h>
#include <sys/kstat.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/seq_file.h>
#include <linux/proc_compat.h>
#include <linux/uaccess.h>
#include <linux/version.h>
#if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
typedef struct ctl_table __no_const spl_ctl_table;
#else
typedef struct ctl_table spl_ctl_table;
#endif
#ifdef DEBUG_KMEM
static unsigned long table_min = 0;
static unsigned long table_max = ~0;
#endif
static struct ctl_table_header *spl_header = NULL;
static struct proc_dir_entry *proc_spl = NULL;
#ifdef DEBUG_KMEM
static struct proc_dir_entry *proc_spl_kmem = NULL;
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
#endif /* DEBUG_KMEM */
struct proc_dir_entry *proc_spl_kstat = NULL;
static int
proc_copyin_string(char *kbuffer, int kbuffer_size,
const char *ubuffer, int ubuffer_size)
{
int size;
if (ubuffer_size > kbuffer_size)
return -EOVERFLOW;
if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
return -EFAULT;
/* strip trailing whitespace */
size = strnlen(kbuffer, ubuffer_size);
while (size-- >= 0)
if (!isspace(kbuffer[size]))
break;
/* empty string */
if (size < 0)
return -EINVAL;
/* no space to terminate */
if (size == kbuffer_size)
return -EOVERFLOW;
kbuffer[size + 1] = 0;
return 0;
}
static int
proc_copyout_string(char *ubuffer, int ubuffer_size,
const char *kbuffer, char *append)
{
/* NB if 'append' != NULL, it's a single character to append to the
* copied out string - usually "\n", for /proc entries and
* (i.e. a terminating zero byte) for sysctl entries
*/
int size = MIN(strlen(kbuffer), ubuffer_size);
if (copy_to_user(ubuffer, kbuffer, size))
return -EFAULT;
if (append != NULL && size < ubuffer_size) {
if (copy_to_user(ubuffer + size, append, 1))
return -EFAULT;
size++;
}
return size;
}
#ifdef DEBUG_KMEM
static int
proc_domemused(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int rc = 0;
unsigned long min = 0, max = ~0, val;
spl_ctl_table dummy = *table;
dummy.data = &val;
dummy.proc_handler = &proc_dointvec;
dummy.extra1 = &min;
dummy.extra2 = &max;
if (write) {
*ppos += *lenp;
} else {
# ifdef HAVE_ATOMIC64_T
val = atomic64_read((atomic64_t *)table->data);
# else
val = atomic_read((atomic_t *)table->data);
# endif /* HAVE_ATOMIC64_T */
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
}
return (rc);
}
static int
proc_doslab(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int rc = 0;
unsigned long min = 0, max = ~0, val = 0, mask;
spl_ctl_table dummy = *table;
spl_kmem_cache_t *skc;
dummy.data = &val;
dummy.proc_handler = &proc_dointvec;
dummy.extra1 = &min;
dummy.extra2 = &max;
if (write) {
*ppos += *lenp;
} else {
down_read(&spl_kmem_cache_sem);
mask = (unsigned long)table->data;
list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
/* Only use slabs of the correct kmem/vmem type */
if (!(skc->skc_flags & mask))
continue;
/* Sum the specified field for selected slabs */
switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
case KMC_TOTAL:
val += skc->skc_slab_size * skc->skc_slab_total;
break;
case KMC_ALLOC:
val += skc->skc_obj_size * skc->skc_obj_alloc;
break;
case KMC_MAX:
val += skc->skc_obj_size * skc->skc_obj_max;
break;
}
}
up_read(&spl_kmem_cache_sem);
rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
}
return (rc);
}
#endif /* DEBUG_KMEM */
static int
proc_dohostid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int len, rc = 0;
char *end, str[32];
if (write) {
/* We can't use proc_doulongvec_minmax() in the write
* case here because hostid while a hex value has no
* leading 0x which confuses the helper function. */
rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
if (rc < 0)
return (rc);
spl_hostid = simple_strtoul(str, &end, 16);
if (str == end)
return (-EINVAL);
} else {
len = snprintf(str, sizeof(str), "%lx", spl_hostid);
if (*ppos >= len)
rc = 0;
else
rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
if (rc >= 0) {
*lenp = rc;
*ppos += rc;
}
}
return (rc);
}
#ifdef DEBUG_KMEM
static void
slab_seq_show_headers(struct seq_file *f)
{
seq_printf(f,
"--------------------- cache ----------"
"--------------------------------------------- "
"----- slab ------ "
"---- object ----- "
"--- emergency ---\n");
seq_printf(f,
"name "
" flags size alloc slabsize objsize "
"total alloc max "
"total alloc max "
"dlock alloc max\n");
}
static int
slab_seq_show(struct seq_file *f, void *p)
{
spl_kmem_cache_t *skc = p;
ASSERT(skc->skc_magic == SKC_MAGIC);
/*
* Backed by Linux slab see /proc/slabinfo.
*/
if (skc->skc_flags & KMC_SLAB)
return (0);
spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
"%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
(long unsigned)skc->skc_flags,
(long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
(long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
(unsigned)skc->skc_slab_size,
(unsigned)skc->skc_obj_size,
(long unsigned)skc->skc_slab_total,
(long unsigned)skc->skc_slab_alloc,
(long unsigned)skc->skc_slab_max,
(long unsigned)skc->skc_obj_total,
(long unsigned)skc->skc_obj_alloc,
(long unsigned)skc->skc_obj_max,
(long unsigned)skc->skc_obj_deadlock,
(long unsigned)skc->skc_obj_emergency,
(long unsigned)skc->skc_obj_emergency_max);
spin_unlock(&skc->skc_lock);
return 0;
}
static void *
slab_seq_start(struct seq_file *f, loff_t *pos)
{
struct list_head *p;
loff_t n = *pos;
down_read(&spl_kmem_cache_sem);
if (!n)
slab_seq_show_headers(f);
p = spl_kmem_cache_list.next;
while (n--) {
p = p->next;
if (p == &spl_kmem_cache_list)
return (NULL);
}
return (list_entry(p, spl_kmem_cache_t, skc_list));
}
static void *
slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
spl_kmem_cache_t *skc = p;
++*pos;
return ((skc->skc_list.next == &spl_kmem_cache_list) ?
NULL : list_entry(skc->skc_list.next,spl_kmem_cache_t,skc_list));
}
static void
slab_seq_stop(struct seq_file *f, void *v)
{
up_read(&spl_kmem_cache_sem);
}
static struct seq_operations slab_seq_ops = {
.show = slab_seq_show,
.start = slab_seq_start,
.next = slab_seq_next,
.stop = slab_seq_stop,
};
static int
proc_slab_open(struct inode *inode, struct file *filp)
{
return seq_open(filp, &slab_seq_ops);
}
static struct file_operations proc_slab_operations = {
.open = proc_slab_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* DEBUG_KMEM */
#ifdef DEBUG_KMEM
static struct ctl_table spl_kmem_table[] = {
{
.procname = "kmem_used",
.data = &kmem_alloc_used,
# ifdef HAVE_ATOMIC64_T
.maxlen = sizeof(atomic64_t),
# else
.maxlen = sizeof(atomic_t),
# endif /* HAVE_ATOMIC64_T */
.mode = 0444,
.proc_handler = &proc_domemused,
},
{
.procname = "kmem_max",
.data = &kmem_alloc_max,
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doulongvec_minmax,
},
{
.procname = "slab_kmem_total",
.data = (void *)(KMC_KMEM | KMC_TOTAL),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_kmem_alloc",
.data = (void *)(KMC_KMEM | KMC_ALLOC),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_kmem_max",
.data = (void *)(KMC_KMEM | KMC_MAX),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_total",
.data = (void *)(KMC_VMEM | KMC_TOTAL),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_alloc",
.data = (void *)(KMC_VMEM | KMC_ALLOC),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{
.procname = "slab_vmem_max",
.data = (void *)(KMC_VMEM | KMC_MAX),
.maxlen = sizeof(unsigned long),
.extra1 = &table_min,
.extra2 = &table_max,
.mode = 0444,
.proc_handler = &proc_doslab,
},
{0},
};
#endif /* DEBUG_KMEM */
static struct ctl_table spl_kstat_table[] = {
{0},
};
static struct ctl_table spl_table[] = {
/* NB No .strategy entries have been provided since
* sysctl(8) prefers to go via /proc for portability.
*/
{
.procname = "version",
.data = spl_version,
.maxlen = sizeof(spl_version),
.mode = 0444,
.proc_handler = &proc_dostring,
},
{
.procname = "hostid",
.data = &spl_hostid,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &proc_dohostid,
},
#ifdef DEBUG_KMEM
{
.procname = "kmem",
.mode = 0555,
.child = spl_kmem_table,
},
#endif
{
.procname = "kstat",
.mode = 0555,
.child = spl_kstat_table,
},
{ 0 },
};
static struct ctl_table spl_dir[] = {
{
.procname = "spl",
.mode = 0555,
.child = spl_table,
},
{ 0 }
};
static struct ctl_table spl_root[] = {
{
#ifdef HAVE_CTL_NAME
.ctl_name = CTL_KERN,
#endif
.procname = "kernel",
.mode = 0555,
.child = spl_dir,
},
{ 0 }
};
int
spl_proc_init(void)
{
int rc = 0;
spl_header = register_sysctl_table(spl_root);
if (spl_header == NULL)
return (-EUNATCH);
proc_spl = proc_mkdir("spl", NULL);
if (proc_spl == NULL) {
rc = -EUNATCH;
goto out;
}
#ifdef DEBUG_KMEM
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
if (proc_spl_kmem == NULL) {
rc = -EUNATCH;
goto out;
}
proc_spl_kmem_slab = proc_create_data("slab", 0444,
proc_spl_kmem, &proc_slab_operations, NULL);
if (proc_spl_kmem_slab == NULL) {
rc = -EUNATCH;
goto out;
}
#endif /* DEBUG_KMEM */
proc_spl_kstat = proc_mkdir("kstat", proc_spl);
if (proc_spl_kstat == NULL) {
rc = -EUNATCH;
goto out;
}
out:
if (rc) {
remove_proc_entry("kstat", proc_spl);
#ifdef DEBUG_KMEM
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
#endif
remove_proc_entry("spl", NULL);
unregister_sysctl_table(spl_header);
}
return (rc);
}
void
spl_proc_fini(void)
{
remove_proc_entry("kstat", proc_spl);
#ifdef DEBUG_KMEM
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
#endif
remove_proc_entry("spl", NULL);
ASSERT(spl_header != NULL);
unregister_sysctl_table(spl_header);
}