mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Add direct+indirect ARC reclaim
Under OpenSolaris all memory reclaim is done asyncronously. Under Linux memory reclaim is done asynchronously _and_ synchronously. When a process allocates memory with GFP_KERNEL it explicitly allows the kernel to do reclaim on its behalf to satify the allocation. If that GFP_KERNEL allocation fails the kernel may take more drastic measures to reclaim the memory such as killing user space processes. This was observed to happen with ZFS because the ARC could consume a large fraction of the system memory but no synchronous reclaim could be performed on it. The result was GFP_KERNEL allocations could fail resulting in OOM events, and only moments latter the arc_reclaim thread would free unused memory from the ARC. This change leaves the arc_thread in place to manage the fundamental ARC behavior. But it adds a synchronous (direct) reclaim path for the ARC which can be called when memory is badly needed. It also adds an asynchronous (indirect) reclaim path which is called much more frequently to prune the ARC slab caches.
This commit is contained in:
parent
1834f2d8b7
commit
7cb67b45f3
@ -282,6 +282,8 @@ typedef struct arc_stats {
|
|||||||
kstat_named_t arcstat_l2_size;
|
kstat_named_t arcstat_l2_size;
|
||||||
kstat_named_t arcstat_l2_hdr_size;
|
kstat_named_t arcstat_l2_hdr_size;
|
||||||
kstat_named_t arcstat_memory_throttle_count;
|
kstat_named_t arcstat_memory_throttle_count;
|
||||||
|
kstat_named_t arcstat_memory_direct_count;
|
||||||
|
kstat_named_t arcstat_memory_indirect_count;
|
||||||
kstat_named_t arcstat_no_grow;
|
kstat_named_t arcstat_no_grow;
|
||||||
kstat_named_t arcstat_tempreserve;
|
kstat_named_t arcstat_tempreserve;
|
||||||
kstat_named_t arcstat_loaned_bytes;
|
kstat_named_t arcstat_loaned_bytes;
|
||||||
@ -344,6 +346,8 @@ static arc_stats_t arc_stats = {
|
|||||||
{ "l2_size", KSTAT_DATA_UINT64 },
|
{ "l2_size", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
|
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
||||||
|
{ "memory_direct_count", KSTAT_DATA_UINT64 },
|
||||||
|
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
|
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
|
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
|
||||||
@ -2171,6 +2175,51 @@ arc_reclaim_thread(void)
|
|||||||
thread_exit();
|
thread_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
/*
|
||||||
|
* Under Linux the arc shrinker may be called for synchronous (direct)
|
||||||
|
* reclaim, or asynchronous (indirect) reclaim. When called by kswapd
|
||||||
|
* for indirect reclaim we take a conservative approach and just reap
|
||||||
|
* free slabs from the ARC caches. If this proves to be insufficient
|
||||||
|
* direct reclaim will be trigger. In direct reclaim a more aggressive
|
||||||
|
* strategy is used, data is evicted from the ARC and free slabs reaped.
|
||||||
|
*/
|
||||||
|
SPL_SHRINKER_CALLBACK_PROTO(arc_shrinker_func, cb, nr_to_scan, gfp_mask)
|
||||||
|
{
|
||||||
|
arc_reclaim_strategy_t strategy;
|
||||||
|
int arc_reclaim;
|
||||||
|
|
||||||
|
/* Not allowed to perform filesystem reclaim */
|
||||||
|
if (!(gfp_mask & __GFP_FS))
|
||||||
|
return (-1);
|
||||||
|
|
||||||
|
/* Return number of reclaimable pages based on arc_shrink_shift */
|
||||||
|
arc_reclaim = btop((arc_size - arc_c_min)) >> arc_shrink_shift;
|
||||||
|
if (nr_to_scan == 0)
|
||||||
|
return (arc_reclaim);
|
||||||
|
|
||||||
|
/* Reclaim in progress */
|
||||||
|
if (mutex_tryenter(&arc_reclaim_thr_lock) == 0)
|
||||||
|
return (-1);
|
||||||
|
|
||||||
|
if (current_is_kswapd()) {
|
||||||
|
strategy = ARC_RECLAIM_CONS;
|
||||||
|
ARCSTAT_INCR(arcstat_memory_indirect_count, 1);
|
||||||
|
} else {
|
||||||
|
strategy = ARC_RECLAIM_AGGR;
|
||||||
|
ARCSTAT_INCR(arcstat_memory_direct_count, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
arc_kmem_reap_now(strategy);
|
||||||
|
arc_reclaim = btop((arc_size - arc_c_min)) >> arc_shrink_shift;
|
||||||
|
mutex_exit(&arc_reclaim_thr_lock);
|
||||||
|
|
||||||
|
return (arc_reclaim);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS);
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adapt arc info given the number of bytes we are trying to add and
|
* Adapt arc info given the number of bytes we are trying to add and
|
||||||
* the state that we are comming from. This function is only called
|
* the state that we are comming from. This function is only called
|
||||||
@ -3485,6 +3534,12 @@ arc_init(void)
|
|||||||
* need to limit the cache to 1/8 of VM size.
|
* need to limit the cache to 1/8 of VM size.
|
||||||
*/
|
*/
|
||||||
arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
|
arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
|
||||||
|
/*
|
||||||
|
* Register a shrinker to support synchronous (direct) memory
|
||||||
|
* reclaim from the arc. This is done to prevent kswapd from
|
||||||
|
* swapping out pages when it is preferable to shrink the arc.
|
||||||
|
*/
|
||||||
|
spl_register_shrinker(&arc_shrinker);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* set min cache to 1/32 of all memory, or 64MB, whichever is more */
|
/* set min cache to 1/32 of all memory, or 64MB, whichever is more */
|
||||||
@ -3602,6 +3657,10 @@ void
|
|||||||
arc_fini(void)
|
arc_fini(void)
|
||||||
{
|
{
|
||||||
mutex_enter(&arc_reclaim_thr_lock);
|
mutex_enter(&arc_reclaim_thr_lock);
|
||||||
|
#ifdef _KERNEL
|
||||||
|
spl_unregister_shrinker(&arc_shrinker);
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
arc_thread_exit = 1;
|
arc_thread_exit = 1;
|
||||||
while (arc_thread_exit != 0)
|
while (arc_thread_exit != 0)
|
||||||
cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);
|
cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);
|
||||||
|
Loading…
Reference in New Issue
Block a user