mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-14 04:00:31 +03:00
Update arc_available_memory() to check freemem
While Linux doesn't provide detailed information about the state of the VM it does provide us total free pages. This information should be incorporated in to the arc_available_memory() calculation rather than solely relying on a signal from direct reclaim. Conceptually this brings arc_available_memory() back in sync with illumos. It is also desirable that the target amount of free memory be tunable on a system. While the default values are expected to work well for most workloads there may be cases where custom values are needed. The zfs_arc_sys_free module option was added for this purpose. zfs_arc_sys_free - The target number of bytes the ARC should leave as free memory on the system. This value can checked in /proc/spl/kstat/zfs/arcstats and setting this module option will override the default value. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3637
This commit is contained in:
parent
6339c1b9dc
commit
11f552fa90
@ -566,6 +566,19 @@ log2(fraction of arc to reclaim)
|
|||||||
Default value: \fB5\fR.
|
Default value: \fB5\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_arc_sys_free\fR (ulong)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
The target number of bytes the ARC should leave as free memory on the system.
|
||||||
|
Defaults to the larger of 1/64 of physical memory or 512K. Setting this
|
||||||
|
option to a non-zero value will override the default.
|
||||||
|
.sp
|
||||||
|
Default value: \fB0\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
@ -240,6 +240,7 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
|
|||||||
/*
|
/*
|
||||||
* These tunables are Linux specific
|
* These tunables are Linux specific
|
||||||
*/
|
*/
|
||||||
|
unsigned long zfs_arc_sys_free = 0;
|
||||||
int zfs_arc_memory_throttle_disable = 1;
|
int zfs_arc_memory_throttle_disable = 1;
|
||||||
int zfs_arc_min_prefetch_lifespan = 0;
|
int zfs_arc_min_prefetch_lifespan = 0;
|
||||||
int zfs_arc_p_aggressive_disable = 1;
|
int zfs_arc_p_aggressive_disable = 1;
|
||||||
@ -473,6 +474,8 @@ typedef struct arc_stats {
|
|||||||
kstat_named_t arcstat_meta_limit;
|
kstat_named_t arcstat_meta_limit;
|
||||||
kstat_named_t arcstat_meta_max;
|
kstat_named_t arcstat_meta_max;
|
||||||
kstat_named_t arcstat_meta_min;
|
kstat_named_t arcstat_meta_min;
|
||||||
|
kstat_named_t arcstat_need_free;
|
||||||
|
kstat_named_t arcstat_sys_free;
|
||||||
} arc_stats_t;
|
} arc_stats_t;
|
||||||
|
|
||||||
static arc_stats_t arc_stats = {
|
static arc_stats_t arc_stats = {
|
||||||
@ -564,7 +567,9 @@ static arc_stats_t arc_stats = {
|
|||||||
{ "arc_meta_used", KSTAT_DATA_UINT64 },
|
{ "arc_meta_used", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
|
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_meta_max", KSTAT_DATA_UINT64 },
|
{ "arc_meta_max", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_meta_min", KSTAT_DATA_UINT64 }
|
{ "arc_meta_min", KSTAT_DATA_UINT64 },
|
||||||
|
{ "arc_need_free", KSTAT_DATA_UINT64 },
|
||||||
|
{ "arc_sys_free", KSTAT_DATA_UINT64 }
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
|
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
|
||||||
@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only;
|
|||||||
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
|
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
|
||||||
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
|
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
|
||||||
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
|
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
|
||||||
|
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
|
||||||
|
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
|
||||||
|
|
||||||
#define L2ARC_IS_VALID_COMPRESS(_c_) \
|
#define L2ARC_IS_VALID_COMPRESS(_c_) \
|
||||||
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
|
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
|
||||||
@ -3222,12 +3229,6 @@ int64_t last_free_memory;
|
|||||||
free_memory_reason_t last_free_reason;
|
free_memory_reason_t last_free_reason;
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#ifdef __linux__
|
|
||||||
/*
|
|
||||||
* expiration time for arc_no_grow set by direct memory reclaim.
|
|
||||||
*/
|
|
||||||
static clock_t arc_grow_time = 0;
|
|
||||||
#else
|
|
||||||
/*
|
/*
|
||||||
* Additional reserve of pages for pp_reserve.
|
* Additional reserve of pages for pp_reserve.
|
||||||
*/
|
*/
|
||||||
@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64;
|
|||||||
* Additional reserve of pages for swapfs.
|
* Additional reserve of pages for swapfs.
|
||||||
*/
|
*/
|
||||||
int64_t arc_swapfs_reserve = 64;
|
int64_t arc_swapfs_reserve = 64;
|
||||||
#endif
|
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3250,26 +3250,14 @@ arc_available_memory(void)
|
|||||||
{
|
{
|
||||||
int64_t lowest = INT64_MAX;
|
int64_t lowest = INT64_MAX;
|
||||||
free_memory_reason_t r = FMR_UNKNOWN;
|
free_memory_reason_t r = FMR_UNKNOWN;
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#ifdef __linux__
|
|
||||||
/*
|
|
||||||
* Under Linux we are not allowed to directly interrogate the global
|
|
||||||
* memory state. Instead rely on observing that direct reclaim has
|
|
||||||
* recently occurred therefore the system must be low on memory. The
|
|
||||||
* exact values returned are not critical but should be small.
|
|
||||||
*/
|
|
||||||
if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time))
|
|
||||||
lowest = PAGE_SIZE;
|
|
||||||
else
|
|
||||||
lowest = -PAGE_SIZE;
|
|
||||||
#else
|
|
||||||
int64_t n;
|
int64_t n;
|
||||||
|
#ifdef __linux__
|
||||||
|
pgcnt_t needfree = btop(arc_need_free);
|
||||||
|
pgcnt_t lotsfree = btop(arc_sys_free);
|
||||||
|
pgcnt_t desfree = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Platforms like illumos have greater visibility in to the memory
|
|
||||||
* subsystem and can return a more detailed analysis of memory.
|
|
||||||
*/
|
|
||||||
if (needfree > 0) {
|
if (needfree > 0) {
|
||||||
n = PAGESIZE * (-needfree);
|
n = PAGESIZE * (-needfree);
|
||||||
if (n < lowest) {
|
if (n < lowest) {
|
||||||
@ -3291,6 +3279,7 @@ arc_available_memory(void)
|
|||||||
r = FMR_LOTSFREE;
|
r = FMR_LOTSFREE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __linux__
|
||||||
/*
|
/*
|
||||||
* check to make sure that swapfs has enough space so that anon
|
* check to make sure that swapfs has enough space so that anon
|
||||||
* reservations can still succeed. anon_resvmem() checks that the
|
* reservations can still succeed. anon_resvmem() checks that the
|
||||||
@ -3319,6 +3308,7 @@ arc_available_memory(void)
|
|||||||
lowest = n;
|
lowest = n;
|
||||||
r = FMR_PAGES_PP_MAXIMUM;
|
r = FMR_PAGES_PP_MAXIMUM;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__i386)
|
#if defined(__i386)
|
||||||
/*
|
/*
|
||||||
@ -3357,12 +3347,11 @@ arc_available_memory(void)
|
|||||||
r = FMR_ZIO_ARENA;
|
r = FMR_ZIO_ARENA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* __linux__ */
|
#else /* _KERNEL */
|
||||||
#else
|
|
||||||
/* Every 100 calls, free a small amount */
|
/* Every 100 calls, free a small amount */
|
||||||
if (spa_get_random(100) == 0)
|
if (spa_get_random(100) == 0)
|
||||||
lowest = -1024;
|
lowest = -1024;
|
||||||
#endif
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
last_free_memory = lowest;
|
last_free_memory = lowest;
|
||||||
last_free_reason = r;
|
last_free_reason = r;
|
||||||
@ -3480,7 +3469,7 @@ arc_reclaim_thread(void)
|
|||||||
to_free = (arc_c >> arc_shrink_shift) - free_memory;
|
to_free = (arc_c >> arc_shrink_shift) - free_memory;
|
||||||
if (to_free > 0) {
|
if (to_free > 0) {
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
to_free = MAX(to_free, ptob(needfree));
|
to_free = MAX(to_free, arc_need_free);
|
||||||
#endif
|
#endif
|
||||||
arc_shrink(to_free);
|
arc_shrink(to_free);
|
||||||
}
|
}
|
||||||
@ -3507,9 +3496,11 @@ arc_reclaim_thread(void)
|
|||||||
/*
|
/*
|
||||||
* We're either no longer overflowing, or we
|
* We're either no longer overflowing, or we
|
||||||
* can't evict anything more, so we should wake
|
* can't evict anything more, so we should wake
|
||||||
* up any threads before we go to sleep.
|
* up any threads before we go to sleep and clear
|
||||||
|
* arc_need_free since nothing more can be done.
|
||||||
*/
|
*/
|
||||||
cv_broadcast(&arc_reclaim_waiters_cv);
|
cv_broadcast(&arc_reclaim_waiters_cv);
|
||||||
|
arc_need_free = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block until signaled, or after one second (we
|
* Block until signaled, or after one second (we
|
||||||
@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
|
|||||||
ARCSTAT_BUMP(arcstat_memory_indirect_count);
|
ARCSTAT_BUMP(arcstat_memory_indirect_count);
|
||||||
} else {
|
} else {
|
||||||
arc_no_grow = B_TRUE;
|
arc_no_grow = B_TRUE;
|
||||||
arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
|
arc_need_free = ptob(sc->nr_to_scan);
|
||||||
ARCSTAT_BUMP(arcstat_memory_direct_count);
|
ARCSTAT_BUMP(arcstat_memory_direct_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5288,6 +5279,10 @@ arc_tuning_update(void)
|
|||||||
/* Valid range: 1 - N ticks */
|
/* Valid range: 1 - N ticks */
|
||||||
if (zfs_arc_min_prefetch_lifespan)
|
if (zfs_arc_min_prefetch_lifespan)
|
||||||
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
|
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
|
||||||
|
|
||||||
|
/* Valid range: 0 - <all physical memory> */
|
||||||
|
if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
|
||||||
|
arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -5329,6 +5324,10 @@ arc_init(void)
|
|||||||
* swapping out pages when it is preferable to shrink the arc.
|
* swapping out pages when it is preferable to shrink the arc.
|
||||||
*/
|
*/
|
||||||
spl_register_shrinker(&arc_shrinker);
|
spl_register_shrinker(&arc_shrinker);
|
||||||
|
|
||||||
|
/* Set to 1/64 of all memory or a minimum of 512K */
|
||||||
|
arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
|
||||||
|
arc_need_free = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Set min cache to allow safe operation of arc_adapt() */
|
/* Set min cache to allow safe operation of arc_adapt() */
|
||||||
@ -7064,4 +7063,7 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
|
|||||||
module_param(l2arc_norw, int, 0644);
|
module_param(l2arc_norw, int, 0644);
|
||||||
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
|
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
|
||||||
|
|
||||||
|
module_param(zfs_arc_sys_free, ulong, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user