mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 19:28:53 +03:00
Illumos 5269 - zpool import slow
5269 zpool import slow Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5269 https://github.com/illumos/illumos-gate/commit/12380e1e Ported-by: DHE <git@dehacked.net> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3396
This commit is contained in:
committed by
Brian Behlendorf
parent
d050c627b5
commit
9c43027b3f
+182
-39
@@ -24,6 +24,7 @@
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@@ -48,6 +49,7 @@
|
||||
#include <sys/sa.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/vdev.h>
|
||||
|
||||
/*
|
||||
* Needed to close a window in dnode_move() that allows the objset to be freed
|
||||
@@ -55,6 +57,16 @@
|
||||
*/
|
||||
krwlock_t os_lock;
|
||||
|
||||
/*
|
||||
* Tunable to overwrite the maximum number of threads for the parallization
|
||||
* of dmu_objset_find_dp, needed to speed up the import of pools with many
|
||||
* datasets.
|
||||
* Default is 4 times the number of leaf vdevs.
|
||||
*/
|
||||
int dmu_find_threads = 0;
|
||||
|
||||
static void dmu_objset_find_dp_cb(void *arg);
|
||||
|
||||
void
|
||||
dmu_objset_init(void)
|
||||
{
|
||||
@@ -504,6 +516,25 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
|
||||
boolean_t readonly, void *tag, objset_t **osp)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = dmu_objset_from_ds(ds, osp);
|
||||
if (err != 0) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EINVAL));
|
||||
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EROFS));
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* dsl_pool must not be held when this is called.
|
||||
* Upon successful return, there will be a longhold on the dataset,
|
||||
@@ -525,21 +556,26 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = dmu_objset_from_ds(ds, osp);
|
||||
err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
if (err != 0) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EINVAL));
|
||||
} else if (!readonly && ds->ds_is_snapshot) {
|
||||
dsl_dataset_disown(ds, tag);
|
||||
return (SET_ERROR(EROFS));
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
|
||||
boolean_t readonly, void *tag, objset_t **osp)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
int err;
|
||||
|
||||
err = dsl_dataset_own_obj(dp, obj, tag, &ds);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_rele(objset_t *os, void *tag)
|
||||
{
|
||||
@@ -1618,30 +1654,41 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find objsets under and including ddobj, call func(ds) on each.
|
||||
*/
|
||||
int
|
||||
dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
|
||||
typedef struct dmu_objset_find_ctx {
|
||||
taskq_t *dc_tq;
|
||||
dsl_pool_t *dc_dp;
|
||||
uint64_t dc_ddobj;
|
||||
int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
|
||||
void *dc_arg;
|
||||
int dc_flags;
|
||||
kmutex_t *dc_error_lock;
|
||||
int *dc_error;
|
||||
} dmu_objset_find_ctx_t;
|
||||
|
||||
static void
|
||||
dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
|
||||
{
|
||||
dsl_pool_t *dp = dcp->dc_dp;
|
||||
dmu_objset_find_ctx_t *child_dcp;
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t *attr;
|
||||
uint64_t thisobj;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
ASSERT(dsl_pool_config_held(dp));
|
||||
/* don't process if there already was an error */
|
||||
if (*dcp->dc_error != 0)
|
||||
goto out;
|
||||
|
||||
err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
|
||||
err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
goto out;
|
||||
|
||||
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
|
||||
if (dd->dd_myname[0] == '$') {
|
||||
dsl_dir_rele(dd, FTAG);
|
||||
return (0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
|
||||
@@ -1650,7 +1697,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
/*
|
||||
* Iterate over all children.
|
||||
*/
|
||||
if (flags & DS_FIND_CHILDREN) {
|
||||
if (dcp->dc_flags & DS_FIND_CHILDREN) {
|
||||
for (zap_cursor_init(&zc, dp->dp_meta_objset,
|
||||
dsl_dir_phys(dd)->dd_child_dir_zapobj);
|
||||
zap_cursor_retrieve(&zc, attr) == 0;
|
||||
@@ -1659,24 +1706,22 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
sizeof (uint64_t));
|
||||
ASSERT3U(attr->za_num_integers, ==, 1);
|
||||
|
||||
err = dmu_objset_find_dp(dp, attr->za_first_integer,
|
||||
func, arg, flags);
|
||||
if (err != 0)
|
||||
break;
|
||||
child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
|
||||
*child_dcp = *dcp;
|
||||
child_dcp->dc_ddobj = attr->za_first_integer;
|
||||
if (dcp->dc_tq != NULL)
|
||||
(void) taskq_dispatch(dcp->dc_tq,
|
||||
dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP);
|
||||
else
|
||||
dmu_objset_find_dp_impl(child_dcp);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
|
||||
if (err != 0) {
|
||||
dsl_dir_rele(dd, FTAG);
|
||||
kmem_free(attr, sizeof (zap_attribute_t));
|
||||
return (err);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all snapshots.
|
||||
*/
|
||||
if (flags & DS_FIND_SNAPSHOTS) {
|
||||
if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
|
||||
dsl_dataset_t *ds;
|
||||
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
|
||||
|
||||
@@ -1697,7 +1742,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
attr->za_first_integer, FTAG, &ds);
|
||||
if (err != 0)
|
||||
break;
|
||||
err = func(dp, ds, arg);
|
||||
err = dcp->dc_func(dp, ds, dcp->dc_arg);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (err != 0)
|
||||
break;
|
||||
@@ -1710,17 +1755,115 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
kmem_free(attr, sizeof (zap_attribute_t));
|
||||
|
||||
if (err != 0)
|
||||
return (err);
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Apply to self.
|
||||
*/
|
||||
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = func(dp, ds, arg);
|
||||
goto out;
|
||||
err = dcp->dc_func(dp, ds, dcp->dc_arg);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
|
||||
out:
|
||||
if (err != 0) {
|
||||
mutex_enter(dcp->dc_error_lock);
|
||||
/* only keep first error */
|
||||
if (*dcp->dc_error == 0)
|
||||
*dcp->dc_error = err;
|
||||
mutex_exit(dcp->dc_error_lock);
|
||||
}
|
||||
|
||||
kmem_free(dcp, sizeof (*dcp));
|
||||
}
|
||||
|
||||
static void
|
||||
dmu_objset_find_dp_cb(void *arg)
|
||||
{
|
||||
dmu_objset_find_ctx_t *dcp = arg;
|
||||
dsl_pool_t *dp = dcp->dc_dp;
|
||||
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
|
||||
dmu_objset_find_dp_impl(dcp);
|
||||
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find objsets under and including ddobj, call func(ds) on each.
|
||||
* The order for the enumeration is completely undefined.
|
||||
* func is called with dsl_pool_config held.
|
||||
*/
|
||||
int
|
||||
dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
|
||||
int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
|
||||
{
|
||||
int error = 0;
|
||||
taskq_t *tq = NULL;
|
||||
int ntasks;
|
||||
dmu_objset_find_ctx_t *dcp;
|
||||
kmutex_t err_lock;
|
||||
|
||||
mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP);
|
||||
dcp->dc_tq = NULL;
|
||||
dcp->dc_dp = dp;
|
||||
dcp->dc_ddobj = ddobj;
|
||||
dcp->dc_func = func;
|
||||
dcp->dc_arg = arg;
|
||||
dcp->dc_flags = flags;
|
||||
dcp->dc_error_lock = &err_lock;
|
||||
dcp->dc_error = &error;
|
||||
|
||||
if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) {
|
||||
/*
|
||||
* In case a write lock is held we can't make use of
|
||||
* parallelism, as down the stack of the worker threads
|
||||
* the lock is asserted via dsl_pool_config_held.
|
||||
* In case of a read lock this is solved by getting a read
|
||||
* lock in each worker thread, which isn't possible in case
|
||||
* of a writer lock. So we fall back to the synchronous path
|
||||
* here.
|
||||
* In the future it might be possible to get some magic into
|
||||
* dsl_pool_config_held in a way that it returns true for
|
||||
* the worker threads so that a single lock held from this
|
||||
* thread suffices. For now, stay single threaded.
|
||||
*/
|
||||
dmu_objset_find_dp_impl(dcp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
ntasks = dmu_find_threads;
|
||||
if (ntasks == 0)
|
||||
ntasks = vdev_count_leaves(dp->dp_spa) * 4;
|
||||
tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
|
||||
INT_MAX, 0);
|
||||
if (tq == NULL) {
|
||||
kmem_free(dcp, sizeof (*dcp));
|
||||
return (SET_ERROR(ENOMEM));
|
||||
}
|
||||
dcp->dc_tq = tq;
|
||||
|
||||
/* dcp will be freed by task */
|
||||
(void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP);
|
||||
|
||||
/*
|
||||
* PORTING: this code relies on the property of taskq_wait to wait
|
||||
* until no more tasks are queued and no more tasks are active. As
|
||||
* we always queue new tasks from within other tasks, task_wait
|
||||
* reliably waits for the full recursion to finish, even though we
|
||||
* enqueue new tasks after taskq_wait has been called.
|
||||
* On platforms other than illumos, taskq_wait may not have this
|
||||
* property.
|
||||
*/
|
||||
taskq_wait(tq);
|
||||
taskq_destroy(tq);
|
||||
mutex_destroy(&err_lock);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user