Linux VM integration / device special files

Support added to provide reasonable values for the global Solaris
VM variables: minfree, desfree, lotsfree, needfree.  These values
are set to the sum of their per-zone linux counterparts which
should be close enough for Solaris consumers.

When a non-GPL app links against the SPL we cannot use the udev
interfaces, which means non of the device special files are created.
Because of this I had added a poor mans udev which cause the SPL
to invoke an upcall and create the basic devices when a minor
is registered.  When a minor is unregistered we use the vnode
interface to unlink the special file.
This commit is contained in:
Brian Behlendorf
2009-02-04 15:15:41 -08:00
parent 31a033ecd4
commit 36b313dacf
13 changed files with 498 additions and 64 deletions
+2
View File
@@ -1,4 +1,6 @@
#ifndef _SPL_DNLC_H
#define _SPL_DNLC_H
#define dnlc_reduce_cache(percent) ((void)0)
#endif /* SPL_DNLC_H */
+1
View File
@@ -45,6 +45,7 @@ extern "C" {
#include <asm/atomic_compat.h>
#include <sys/types.h>
#include <sys/debug.h>
#include <sys/vmsystm.h>
/*
* Memory allocation interfaces
+31 -3
View File
@@ -32,12 +32,15 @@
#include <sys/sunldi.h>
#include <sys/mutex.h>
#include <sys/u8_textprep.h>
#include <sys/vnode.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/list.h>
#include <spl-device.h>
#define DDI_MAX_NAME_LEN 32
typedef int ddi_devid_t;
typedef enum {
@@ -80,6 +83,7 @@ typedef struct pollhead {
typedef struct dev_info {
kmutex_t di_lock;
char di_name[DDI_MAX_NAME_LEN];
struct dev_ops *di_ops;
struct cdev *di_cdev;
spl_class *di_class;
@@ -202,6 +206,7 @@ extern void __ddi_remove_minor_node(dev_info_t *dip, char *name);
extern int ddi_quiesce_not_needed(dev_info_t *dip);
extern int __mod_install(struct modlinkage *modlp);
extern int __mod_remove(struct modlinkage *modlp);
extern int __mod_mknod(char *name, char *type, int major, int minor);
extern int ddi_strtoul(const char *, char **, int, unsigned long *);
extern int ddi_strtol(const char *, char **, int, long *);
@@ -226,7 +231,16 @@ ddi_remove_minor_node(dev_info_t *di, char *name)
di->di_class = NULL;
di->di_dev = 0;
}
#endif
#else
/* When we do not have access to the GPL-only device interfaces we
* are forced to do something crude. We unlink the special device
* file in /dev/ ourselves from within the kernel. On the upside we
* are already providing this functionality for Solaris, and it is
* easy to leverage the Solaris API to perform the unlink. */
if (strlen(di->di_name) > 0)
vn_remove(di->di_name, UIO_SYSSPACE, RMFILE);
#endif /* HAVE_GPL_ONLY_SYMBOLS */
__ddi_remove_minor_node(di, name);
}
@@ -254,14 +268,28 @@ ddi_create_minor_node(dev_info_t *di, char *name, int spec_type,
di->di_class = NULL;
ddi_remove_minor_node(di, name);
CERROR("Error creating %s class, %d\n", name, rc);
RETURN(DDI_FAILURE);
return DDI_FAILURE;
}
/* Do not append a 0 to devices with minor nums of 0 */
di->di_device = spl_device_create(di->di_class, NULL, di->di_dev, NULL,
(di->di_minor == 0) ? "%s" : "%s%d",
name, di->di_minor);
#endif
#else
/* When we do not have access to the GPL-only device interfaces we
* are forced to do something horible. We use a user mode helper to
* create the special device file in /dev/. By futher extending the
* Solaris vnode implementation we could potentially do a vn_create()
* from within the kernel but that's still a hack. */
if (name) {
rc = __mod_mknod(di->di_name, "c", di->di_major, di->di_minor);
if (rc) {
CERROR("Error mknod %s, %d\n", di->di_name, rc);
ddi_remove_minor_node(di, name);
}
}
#endif /* HAVE_GPL_ONLY_SYMBOLS */
return rc;
}
+3 -2
View File
@@ -67,10 +67,13 @@ extern "C" {
#define DEV_BSIZE 512
#define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */
#define curproc current
#define proc_pageout NULL
#define max_ncpus 64
#define CPU_SEQID smp_processor_id() /* I think... */
#define _NOTE(x)
#define RLIM64_INFINITY RLIM_INFINITY
/* 0..MAX_PRIO-1: Process priority
@@ -135,8 +138,6 @@ extern int p0;
/* Missing misc functions */
extern int highbit(unsigned long i);
extern int ddi_strtoul(const char *str, char **nptr,
int base, unsigned long *result);
#define makedevice(maj,min) makedev(maj,min)
+36 -26
View File
@@ -27,19 +27,50 @@
#ifndef _SPL_VMSYSTM_H
#define _SPL_VMSYSTM_H
#include <linux/mmzone.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <sys/types.h>
#include <asm/uaccess.h>
extern vmem_t *zio_alloc_arena; /* arena for zio caches */
/* These values are loosely coupled with the the VM page reclaim.
* Linux uses its own heuristics to trigger page reclamation, and
* because those interface are difficult to interface with. These
* values should only be considered as a rough guide to the system
* memory state and not as direct evidence that page reclaimation
* is or is not currently in progress.
*/
#define ptob(pages) (pages * PAGE_SIZE)
#define membar_producer() smp_wmb()
#define physmem num_physpages
#define freemem nr_free_pages()
#define minfree 0
#define needfree 0 /* # of needed pages */
#define ptob(pages) (pages * PAGE_SIZE)
#define membar_producer() smp_wmb()
extern pgcnt_t minfree; /* Sum of zone->pages_min */
extern pgcnt_t desfree; /* Sum of zone->pages_low */
extern pgcnt_t lotsfree; /* Sum of zone->pages_high */
extern pgcnt_t needfree; /* Always 0 */
extern pgcnt_t swapfs_minfree;
extern pgcnt_t swapfs_desfree;
extern pgcnt_t swapfs_reserve;
extern pgcnt_t availrmem;
extern vmem_t *heap_arena; /* primary kernel heap arena */
extern vmem_t *zio_alloc_arena; /* arena for zio caches */
extern vmem_t *zio_arena; /* arena for allocating zio memory */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
static __inline__ size_t
vmem_size(vmem_t *vmp, int typemask)
{
/* Arena's unsupported */
ASSERT(vmp == NULL);
ASSERT(typemask & (VMEM_ALLOC | VMEM_FREE));
return 0;
}
#define xcopyin(from, to, size) copy_from_user(to, from, size)
#define xcopyout(from, to, size) copy_to_user(to, from, size)
@@ -82,25 +113,4 @@ copyinstr(const void *from, void *to, size_t len, size_t *done)
return 0;
}
#if 0
/* The average number of free pages over the last 5 seconds */
#define avefree 0
/* The average number of free pages over the last 30 seconds */
#define avefree30 0
/* A guess as to how much memory has been promised to
* processes but not yet allocated */
#define deficit 0
/* A bootlean the controls the setting of deficit */
#define desperate
/* When free memory is above this limit, no paging or swapping is done */
#define lotsfree 0
/* When free memory is above this limit, swapping is not performed */
#define desfree 0
#endif
#endif /* SPL_VMSYSTM_H */