diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index f801f257b..6cba7f02c 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -79,6 +79,20 @@ Compressed l2arc_headroom multiplier Default value: \fB200\fR. .RE +.sp +.ne 2 +.na +\fBl2arc_max_block_size\fR (ulong) +.ad +.RS 12n +The maximum block size which may be written to an L2ARC device, after +compression and other factors. This setting is used to prevent a small +number of large blocks from pushing a larger number of small blocks out +of the cache. +.sp +Default value: \fB16,777,216\fR. +.RE + .sp .ne 2 .na diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 4d7bb8f8f..faed67aa4 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -446,6 +446,7 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_writes_done; kstat_named_t arcstat_l2_writes_error; kstat_named_t arcstat_l2_writes_lock_retry; + kstat_named_t arcstat_l2_writes_skip_toobig; kstat_named_t arcstat_l2_evict_lock_retry; kstat_named_t arcstat_l2_evict_reading; kstat_named_t arcstat_l2_evict_l1cached; @@ -542,6 +543,7 @@ static arc_stats_t arc_stats = { { "l2_writes_done", KSTAT_DATA_UINT64 }, { "l2_writes_error", KSTAT_DATA_UINT64 }, { "l2_writes_lock_retry", KSTAT_DATA_UINT64 }, + { "l2_writes_skip_toobig", KSTAT_DATA_UINT64 }, { "l2_evict_lock_retry", KSTAT_DATA_UINT64 }, { "l2_evict_reading", KSTAT_DATA_UINT64 }, { "l2_evict_l1cached", KSTAT_DATA_UINT64 }, @@ -726,6 +728,8 @@ uint64_t zfs_crc64_table[256]; #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */ #define L2ARC_HEADROOM 2 /* num of writes */ +#define L2ARC_MAX_BLOCK_SIZE (16 * 1024 * 1024) /* max compress size */ + /* * If we discover during ARC scan any buffers to be compressed, we boost * our headroom for the next scanning cycle by this percentage multiple. @@ -734,6 +738,7 @@ uint64_t zfs_crc64_table[256]; #define L2ARC_FEED_SECS 1 /* caching interval secs */ #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */ + /* * Used to distinguish headers that are being process by * l2arc_write_buffers(), but have yet to be assigned to a l2arc disk @@ -752,6 +757,7 @@ unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ unsigned long l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; +unsigned long l2arc_max_block_size = L2ARC_MAX_BLOCK_SIZE; unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ @@ -6020,7 +6026,20 @@ top: */ l2arc_release_cdata_buf(hdr); - if (zio->io_error != 0) { + /* + * Skipped - drop L2ARC entry and mark the header as no + * longer L2 eligibile. + */ + if (hdr->b_l2hdr.b_daddr == L2ARC_ADDR_UNSET) { + list_remove(buflist, hdr); + hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR; + hdr->b_flags &= ~ARC_FLAG_L2CACHE; + + ARCSTAT_BUMP(arcstat_l2_writes_skip_toobig); + + (void) refcount_remove_many(&dev->l2ad_alloc, + hdr->b_l2hdr.b_asize, hdr); + } else if (zio->io_error != 0) { /* * Error - drop L2ARC entry. */ @@ -6567,6 +6586,16 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, if (buf_sz != 0) { uint64_t buf_a_sz; + /* + * Buffers which are larger than l2arc_max_block_size + * after compression are skipped and removed from L2 + * eligibility. + */ + if (buf_sz > l2arc_max_block_size) { + hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET; + continue; + } + wzio = zio_write_phys(pio, dev->l2ad_vdev, dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, @@ -7129,6 +7158,9 @@ MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache"); module_param(l2arc_headroom_boost, ulong, 0644); MODULE_PARM_DESC(l2arc_headroom_boost, "Compressed l2arc_headroom multiplier"); +module_param(l2arc_max_block_size, ulong, 0644); +MODULE_PARM_DESC(l2arc_max_block_size, "Skip L2ARC buffers larger than N"); + module_param(l2arc_feed_secs, ulong, 0644); MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");