mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-26 12:12:13 +03:00
Illumos #1051: zfs should handle imbalanced luns
Today zfs tries to allocate blocks evenly across all devices. This means when devices are imbalanced zfs will use lots of CPU searching for space on devices which tend to be pretty full. It should instead fail quickly on the full LUNs and move onto devices which have more availability. Reviewed by: Eric Schrock <Eric.Schrock@delphix.com> Reviewed by: Matt Ahrens <Matt.Ahrens@delphix.com> Reviewed by: Adam Leventhal <Adam.Leventhal@delphix.com> Reviewed by: Albert Lee <trisk@nexenta.com> Reviewed by: Gordon Ross <gwr@nexenta.com> Approved by: Garrett D'Amore <garrett@nexenta.com> References to Illumos issue and patch: - https://www.illumos.org/issues/510 - https://github.com/illumos/illumos-gate/commit/5ead3ed965 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #340
This commit is contained in:
committed by
Brian Behlendorf
parent
bb939d1085
commit
6d974228ef
+21
-1
@@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -79,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX;
|
||||
#ifdef _KERNEL
|
||||
extern vmem_t *zio_alloc_arena;
|
||||
#endif
|
||||
extern int zfs_mg_alloc_failures;
|
||||
|
||||
/*
|
||||
* An allocating zio is one that either currently has the DVA allocate
|
||||
@@ -158,6 +160,12 @@ zio_init(void)
|
||||
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
|
||||
}
|
||||
|
||||
/*
|
||||
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
|
||||
* to fail 3 times per txg or 8 failures, whichever is greater.
|
||||
*/
|
||||
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
|
||||
|
||||
zio_inject_init();
|
||||
}
|
||||
|
||||
@@ -2151,6 +2159,7 @@ zio_dva_allocate(zio_t *zio)
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
int error;
|
||||
int flags = 0;
|
||||
|
||||
if (zio->io_gang_leader == NULL) {
|
||||
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
|
||||
@@ -2163,10 +2172,21 @@ zio_dva_allocate(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
/*
|
||||
* The dump device does not support gang blocks so allocation on
|
||||
* behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
|
||||
* the "fast" gang feature.
|
||||
*/
|
||||
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
|
||||
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
|
||||
METASLAB_GANG_CHILD : 0;
|
||||
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
|
||||
|
||||
if (error) {
|
||||
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
|
||||
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
|
||||
error);
|
||||
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
|
||||
return (zio_write_gang_block(zio));
|
||||
zio->io_error = error;
|
||||
|
||||
Reference in New Issue
Block a user