Linux VM integration / device special files

Support added to provide reasonable values for the global Solaris
VM variables: minfree, desfree, lotsfree, needfree.  These values
are set to the sum of their per-zone linux counterparts which
should be close enough for Solaris consumers.

When a non-GPL app links against the SPL we cannot use the udev
interfaces, which means non of the device special files are created.
Because of this I had added a poor mans udev which cause the SPL
to invoke an upcall and create the basic devices when a minor
is registered.  When a minor is unregistered we use the vnode
interface to unlink the special file.
This commit is contained in:
Brian Behlendorf
2009-02-04 15:15:41 -08:00
parent 31a033ecd4
commit 36b313dacf
13 changed files with 498 additions and 64 deletions
-3
View File
@@ -53,9 +53,6 @@ EXPORT_SYMBOL(hw_serial);
int p0 = 0;
EXPORT_SYMBOL(p0);
vmem_t *zio_alloc_arena = NULL;
EXPORT_SYMBOL(zio_alloc_arena);
int
highbit(unsigned long i)
{
+109
View File
@@ -32,6 +32,96 @@
#define DEBUG_SUBSYSTEM S_KMEM
/*
* The minimum amount of memory measured in pages to be free at all
* times on the system. This is similar to Linux's zone->pages_min
* multipled by the number of zones and is sized based on that.
*/
pgcnt_t minfree = 0;
EXPORT_SYMBOL(minfree);
/*
* The desired amount of memory measured in pages to be free at all
* times on the system. This is similar to Linux's zone->pages_low
* multipled by the number of zones and is sized based on that.
* Assuming all zones are being used roughly equally, when we drop
* below this threshold async page reclamation is triggered.
*/
pgcnt_t desfree = 0;
EXPORT_SYMBOL(desfree);
/*
* When above this amount of memory measures in pages the system is
* determined to have enough free memory. This is similar to Linux's
* zone->pages_high multipled by the number of zones and is sized based
* on that. Assuming all zones are being used roughly equally, when
* async page reclamation reaches this threshold it stops.
*/
pgcnt_t lotsfree = 0;
EXPORT_SYMBOL(lotsfree);
/* Unused always 0 in this implementation */
pgcnt_t needfree = 0;
EXPORT_SYMBOL(needfree);
pgcnt_t swapfs_desfree = 0;
EXPORT_SYMBOL(swapfs_desfree);
pgcnt_t swapfs_minfree = 0;
EXPORT_SYMBOL(swapfs_minfree);
pgcnt_t swapfs_reserve = 0;
EXPORT_SYMBOL(swapfs_reserve);
pgcnt_t availrmem = 0;
EXPORT_SYMBOL(availrmem);
vmem_t *heap_arena = NULL;
EXPORT_SYMBOL(heap_arena);
vmem_t *zio_alloc_arena = NULL;
EXPORT_SYMBOL(zio_alloc_arena);
vmem_t *zio_arena = NULL;
EXPORT_SYMBOL(zio_arena);
#ifndef HAVE_FIRST_ONLINE_PGDAT
struct pglist_data *first_online_pgdat(void)
{
return NODE_DATA(first_online_node);
}
#endif /* HAVE_FIRST_ONLINE_PGDAT */
#ifndef HAVE_NEXT_ONLINE_PGDAT
struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
{
int nid = next_online_node(pgdat->node_id);
if (nid == MAX_NUMNODES)
return NULL;
return NODE_DATA(nid);
}
#endif /* HAVE_NEXT_ONLINE_PGDAT */
#ifndef HAVE_NEXT_ZONE
struct zone *next_zone(struct zone *zone)
{
pg_data_t *pgdat = zone->zone_pgdat;
if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
zone++;
else {
pgdat = next_online_pgdat(pgdat);
if (pgdat)
zone = pgdat->node_zones;
else
zone = NULL;
}
return zone;
}
#endif /* HAVE_NEXT_ZONE */
/*
* Memory allocation interfaces and debugging for basic kmem_*
* and vmem_* style memory allocation. When DEBUG_KMEM is enable
@@ -1601,6 +1691,24 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
#define spl_kmem_fini_tracking(list, lock)
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
static void
spl_kmem_init_globals(void)
{
struct zone *zone;
/* For now all zones are includes, it may be wise to restrict
* this to normal and highmem zones if we see problems. */
for_each_zone(zone) {
if (!populated_zone(zone))
continue;
minfree += zone->pages_min;
desfree += zone->pages_low;
lotsfree += zone->pages_high;
}
}
int
spl_kmem_init(void)
{
@@ -1609,6 +1717,7 @@ spl_kmem_init(void)
init_rwsem(&spl_kmem_cache_sem);
INIT_LIST_HEAD(&spl_kmem_cache_list);
spl_kmem_init_globals();
#ifdef HAVE_SET_SHRINKER
spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
+25
View File
@@ -140,6 +140,7 @@ __ddi_create_minor_node(dev_info_t *di, char *name, int spec_type,
ASSERT(cb_ops->cb_aread == NULL);
ASSERT(cb_ops->cb_awrite == NULL);
snprintf(di->di_name, DDI_MAX_NAME_LEN-1, "/dev/%s", name);
di->di_cdev = cdev;
di->di_flags = flags;
di->di_minor = minor_num;
@@ -281,6 +282,30 @@ __mod_install(struct modlinkage *modlp)
}
EXPORT_SYMBOL(__mod_install);
int
__mod_mknod(char *name, char *type, int major, int minor)
{
char cmd[] = "/bin/mknod";
char major_str[8];
char minor_str[8];
char *argv[] = { cmd,
name,
type,
major_str,
minor_str,
NULL };
char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
snprintf(major_str, 8, "%d", major);
snprintf(minor_str, 8, "%d", minor);
return call_usermodehelper(cmd, argv, envp, 1);
}
EXPORT_SYMBOL(__mod_mknod);
int
__mod_remove(struct modlinkage *modlp)
{
+126 -30
View File
@@ -60,6 +60,7 @@ struct proc_dir_entry *proc_spl_kstat = NULL;
#define CTL_SPL CTL_UNNUMBERED
#define CTL_SPL_DEBUG CTL_UNNUMBERED
#define CTL_SPL_VM CTL_UNNUMBERED
#define CTL_SPL_MUTEX CTL_UNNUMBERED
#define CTL_SPL_KMEM CTL_UNNUMBERED
#define CTL_SPL_KSTAT CTL_UNNUMBERED
@@ -85,6 +86,15 @@ struct proc_dir_entry *proc_spl_kstat = NULL;
#define CTL_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED /* Init delay skip messages */
#define CTL_CONSOLE_BACKOFF CTL_UNNUMBERED /* Delay increase factor */
#define CTL_VM_MINFREE CTL_UNNUMBERED /* Minimum free memory */
#define CTL_VM_DESFREE CTL_UNNUMBERED /* Desired free memory */
#define CTL_VM_LOTSFREE CTL_UNNUMBERED /* Lots of free memory */
#define CTL_VM_NEEDFREE CTL_UNNUMBERED /* Need free memory */
#define CTL_VM_SWAPFS_MINFREE CTL_UNNUMBERED /* Minimum swapfs memory */
#define CTL_VM_SWAPFS_DESFREE CTL_UNNUMBERED /* Desired swapfs memory */
#define CTL_VM_SWAPFS_RESERVE CTL_UNNUMBERED /* Reserved swapfs memory */
#define CTL_VM_AVAILRMEM CTL_UNNUMBERED /* Available reserved memory */
#ifdef DEBUG_KMEM
#define CTL_KMEM_KMEMUSED CTL_UNNUMBERED /* Alloc'd kmem bytes */
#define CTL_KMEM_KMEMMAX CTL_UNNUMBERED /* Max alloc'd by kmem bytes */
@@ -99,44 +109,56 @@ struct proc_dir_entry *proc_spl_kstat = NULL;
#else /* HAVE_CTL_UNNUMBERED */
#define CTL_SPL 0x87
#define CTL_SPL_DEBUG 0x88
#define CTL_SPL_MUTEX 0x89
#define CTL_SPL_KMEM 0x90
#define CTL_SPL_KSTAT 0x91
enum {
CTL_SPL = 0x87,
CTL_SPL_DEBUG = 0x88,
CTL_SPL_VM = 0x89,
CTL_SPL_MUTEX = 0x90,
CTL_SPL_KMEM = 0x91,
CTL_SPL_KSTAT = 0x92,
};
enum {
CTL_VERSION = 1, /* Version */
CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
CTL_HW_SERIAL, /* Hardware serial number from hostid */
CTL_VERSION = 1, /* Version */
CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
CTL_HW_SERIAL, /* Hardware serial number from hostid */
CTL_DEBUG_SUBSYS, /* Debug subsystem */
CTL_DEBUG_MASK, /* Debug mask */
CTL_DEBUG_PRINTK, /* Force all messages to console */
CTL_DEBUG_MB, /* Debug buffer size */
CTL_DEBUG_BINARY, /* Include binary data in buffer */
CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */
CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */
CTL_DEBUG_PATH, /* Dump log location */
CTL_DEBUG_DUMP, /* Dump debug buffer to file */
CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */
CTL_DEBUG_STACK_SIZE, /* Max observed stack size */
CTL_DEBUG_SUBSYS, /* Debug subsystem */
CTL_DEBUG_MASK, /* Debug mask */
CTL_DEBUG_PRINTK, /* Force all messages to console */
CTL_DEBUG_MB, /* Debug buffer size */
CTL_DEBUG_BINARY, /* Include binary data in buffer */
CTL_DEBUG_CATASTROPHE, /* Set if we have BUG'd or panic'd */
CTL_DEBUG_PANIC_ON_BUG, /* Set if we should panic on BUG */
CTL_DEBUG_PATH, /* Dump log location */
CTL_DEBUG_DUMP, /* Dump debug buffer to file */
CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */
CTL_DEBUG_STACK_SIZE, /* Max observed stack size */
CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
CTL_CONSOLE_BACKOFF, /* Delay increase factor */
CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
CTL_CONSOLE_MAX_DELAY_CS, /* Max delay which we skip messages */
CTL_CONSOLE_MIN_DELAY_CS, /* Init delay which we skip messages */
CTL_CONSOLE_BACKOFF, /* Delay increase factor */
CTL_VM_MINFREE, /* Minimum free memory threshold */
CTL_VM_DESFREE, /* Desired free memory threshold */
CTL_VM_LOTSFREE, /* Lots of free memory threshold */
CTL_VM_NEEDFREE, /* Need free memory deficit */
CTL_VM_SWAPFS_MINFREE, /* Minimum swapfs memory */
CTL_VM_SWAPFS_DESFREE, /* Desired swapfs memory */
CTL_VM_SWAPFS_RESERVE, /* Reserved swapfs memory */
CTL_VM_AVAILRMEM, /* Available reserved memory */
#ifdef DEBUG_KMEM
CTL_KMEM_KMEMUSED, /* Alloc'd kmem bytes */
CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */
CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */
CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */
CTL_KMEM_KMEMUSED, /* Alloc'd kmem bytes */
CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */
CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */
CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */
#endif
CTL_MUTEX_STATS, /* Global mutex statistics */
CTL_MUTEX_STATS_PER, /* Per mutex statistics */
CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */
CTL_MUTEX_STATS, /* Global mutex statistics */
CTL_MUTEX_STATS_PER, /* Per mutex statistics */
CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */
};
#endif /* HAVE_CTL_UNNUMBERED */
@@ -769,6 +791,74 @@ static struct ctl_table spl_debug_table[] = {
{0},
};
static struct ctl_table spl_vm_table[] = {
{
.ctl_name = CTL_VM_MINFREE,
.procname = "minfree",
.data = &minfree,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_DESFREE,
.procname = "desfree",
.data = &desfree,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_LOTSFREE,
.procname = "lotsfree",
.data = &lotsfree,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_NEEDFREE,
.procname = "needfree",
.data = &needfree,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_SWAPFS_MINFREE,
.procname = "swapfs_minfree",
.data = &swapfs_minfree,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_SWAPFS_DESFREE,
.procname = "swapfs_desfree",
.data = &swapfs_desfree,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_SWAPFS_RESERVE,
.procname = "swapfs_reserve",
.data = &swapfs_reserve,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_VM_AVAILRMEM,
.procname = "availrmem",
.data = &availrmem,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{0},
};
#ifdef DEBUG_MUTEX
static struct ctl_table spl_mutex_table[] = {
{
@@ -873,6 +963,12 @@ static struct ctl_table spl_table[] = {
.mode = 0555,
.child = spl_debug_table,
},
{
.ctl_name = CTL_SPL_VM,
.procname = "vm",
.mode = 0555,
.child = spl_vm_table,
},
#ifdef DEBUG_MUTEX
{
.ctl_name = CTL_SPL_MUTEX,