OpenZFS 7303 - dynamic metaslab selection

This change introduces a new weighting algorithm to improve
metaslab selection. The new weighting algorithm relies on the
SPACEMAP_HISTOGRAM feature. As a result, the metaslab weight
now encodes the type of weighting algorithm used (size-based
vs segment-based).

Porting Notes: The metaslab allocation tracing code is conditionally
removed on linux (dependent on mdb debugger).

Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
Reviewed by: Pavel Zakharov pavel.zakharov@delphix.com
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: Don Brady <don.brady@intel.com>

OpenZFS-issue: https://www.illumos.org/issues/7303
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/d5190931bd
Closes #5404
This commit is contained in:
Don Brady
2017-01-12 12:52:56 -07:00
committed by Brian Behlendorf
parent 5727b00e06
commit 4e21fd060a
13 changed files with 1042 additions and 226 deletions
+837 -201
View File
File diff suppressed because it is too large Load Diff
+14 -1
View File
@@ -1313,7 +1313,7 @@ spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
static void
spa_unload(spa_t *spa)
{
int i;
int i, c;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -1330,6 +1330,19 @@ spa_unload(spa_t *spa)
spa->spa_sync_on = B_FALSE;
}
/*
* Even though vdev_free() also calls vdev_metaslab_fini, we need
* to call it earlier, before we wait for async i/o to complete.
* This ensures that there is no async metaslab prefetching, by
* calling taskq_wait(mg_taskq).
*/
if (spa->spa_root_vdev != NULL) {
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
for (c = 0; c < spa->spa_root_vdev->vdev_children; c++)
vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
* Wait for any outstanding async I/O to complete.
*/
+2
View File
@@ -1833,6 +1833,7 @@ spa_init(int mode)
refcount_init();
unique_init();
range_tree_init();
metaslab_alloc_trace_init();
ddt_init();
zio_init();
dmu_init();
@@ -1861,6 +1862,7 @@ spa_fini(void)
dmu_fini();
zio_fini();
ddt_fini();
metaslab_alloc_trace_fini();
range_tree_fini();
unique_fini();
refcount_fini();
-1
View File
@@ -173,7 +173,6 @@ space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
dmu_buf_will_dirty(sm->sm_dbuf, tx);
ASSERT(space_map_histogram_verify(sm, rt));
/*
* Transfer the content of the range tree histogram to the space
* map histogram. The space map histogram contains 32 buckets ranging
+14 -4
View File
@@ -596,6 +596,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
offsetof(zio_link_t, zl_parent_node));
list_create(&zio->io_child_list, sizeof (zio_link_t),
offsetof(zio_link_t, zl_child_node));
metaslab_trace_init(&zio->io_alloc_list);
if (vd != NULL)
zio->io_child_type = ZIO_CHILD_VDEV;
@@ -657,6 +658,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
static void
zio_destroy(zio_t *zio)
{
metaslab_trace_fini(&zio->io_alloc_list);
list_destroy(&zio->io_parent_list);
list_destroy(&zio->io_child_list);
mutex_destroy(&zio->io_lock);
@@ -2299,7 +2301,8 @@ zio_write_gang_block(zio_t *pio)
}
error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags, pio);
bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags,
&pio->io_alloc_list, pio);
if (error) {
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
@@ -3011,7 +3014,8 @@ zio_dva_allocate(zio_t *zio)
flags |= METASLAB_ASYNC_ALLOC;
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags, zio);
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio);
if (error != 0) {
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
@@ -3077,18 +3081,24 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
boolean_t use_slog)
{
int error = 1;
zio_alloc_list_t io_alloc_list;
ASSERT(txg > spa_syncing_txg(spa));
metaslab_trace_init(&io_alloc_list);
if (use_slog) {
error = metaslab_alloc(spa, spa_log_class(spa), size,
new_bp, 1, txg, NULL, METASLAB_FASTWRITE, NULL);
new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
&io_alloc_list, NULL);
}
if (error) {
error = metaslab_alloc(spa, spa_normal_class(spa), size,
new_bp, 1, txg, NULL, METASLAB_FASTWRITE, NULL);
new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
&io_alloc_list, NULL);
}
metaslab_trace_fini(&io_alloc_list);
if (error == 0) {
BP_SET_LSIZE(new_bp, size);