Update to onnv_147

This is the last official OpenSolaris tag before the public
development tree was closed.
This commit is contained in:
Brian Behlendorf
2010-08-26 14:24:34 -07:00
parent 1980602bfa
commit 572e285762
101 changed files with 7835 additions and 2448 deletions
+369 -152
View File
@@ -116,6 +116,7 @@ static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);
uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
@@ -180,6 +181,8 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
size - alloc, src);
spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
(spa_mode(spa) == FREAD), src);
cap = (size == 0) ? 0 : (alloc * 100 / size);
spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
@@ -529,7 +532,9 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
nvpair_name(elem))) == ZPROP_INVAL)
return (EINVAL);
if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
if (prop == ZPOOL_PROP_CACHEFILE ||
prop == ZPOOL_PROP_ALTROOT ||
prop == ZPOOL_PROP_READONLY)
continue;
need_sync = B_TRUE;
@@ -1284,33 +1289,131 @@ spa_check_removed(vdev_t *vd)
}
/*
* Load the slog device state from the config object since it's possible
* that the label does not contain the most up-to-date information.
* Validate the current config against the MOS config
*/
void
spa_load_log_state(spa_t *spa, nvlist_t *nv)
static boolean_t
spa_config_valid(spa_t *spa, nvlist_t *config)
{
vdev_t *ovd, *rvd = spa->spa_root_vdev;
vdev_t *mrvd, *rvd = spa->spa_root_vdev;
nvlist_t *nv;
VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
/*
* Load the original root vdev tree from the passed config.
* If we're doing a normal import, then build up any additional
* diagnostic information about missing devices in this config.
* We'll pass this up to the user for further processing.
*/
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
nvlist_t **child, *nv;
uint64_t idx = 0;
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *cvd = rvd->vdev_child[c];
if (cvd->vdev_islog)
vdev_load_log_state(cvd, ovd->vdev_child[c]);
child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
KM_SLEEP);
VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
vdev_t *mtvd = mrvd->vdev_child[c];
if (tvd->vdev_ops == &vdev_missing_ops &&
mtvd->vdev_ops != &vdev_missing_ops &&
mtvd->vdev_islog)
child[idx++] = vdev_config_generate(spa, mtvd,
B_FALSE, 0);
}
if (idx) {
VERIFY(nvlist_add_nvlist_array(nv,
ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
VERIFY(nvlist_add_nvlist(spa->spa_load_info,
ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
for (int i = 0; i < idx; i++)
nvlist_free(child[i]);
}
nvlist_free(nv);
kmem_free(child, rvd->vdev_children * sizeof (char **));
}
vdev_free(ovd);
/*
* Compare the root vdev tree with the information we have
* from the MOS config (mrvd). Check each top-level vdev
* with the corresponding MOS config top-level (mtvd).
*/
for (int c = 0; c < rvd->vdev_children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
vdev_t *mtvd = mrvd->vdev_child[c];
/*
* Resolve any "missing" vdevs in the current configuration.
* If we find that the MOS config has more accurate information
* about the top-level vdev then use that vdev instead.
*/
if (tvd->vdev_ops == &vdev_missing_ops &&
mtvd->vdev_ops != &vdev_missing_ops) {
if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
continue;
/*
* Device specific actions.
*/
if (mtvd->vdev_islog) {
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
/*
* XXX - once we have 'readonly' pool
* support we should be able to handle
* missing data devices by transitioning
* the pool to readonly.
*/
continue;
}
/*
* Swap the missing vdev with the data we were
* able to obtain from the MOS config.
*/
vdev_remove_child(rvd, tvd);
vdev_remove_child(mrvd, mtvd);
vdev_add_child(rvd, mtvd);
vdev_add_child(mrvd, tvd);
spa_config_exit(spa, SCL_ALL, FTAG);
vdev_load(mtvd);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_reopen(rvd);
} else if (mtvd->vdev_islog) {
/*
* Load the slog device's state from the MOS config
* since it's possible that the label does not
* contain the most up-to-date information.
*/
vdev_load_log_state(tvd, mtvd);
vdev_reopen(tvd);
}
}
vdev_free(mrvd);
spa_config_exit(spa, SCL_ALL, FTAG);
/*
* Ensure we were able to validate the config.
*/
return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
}
/*
* Check for missing log devices
*/
int
static int
spa_check_logs(spa_t *spa)
{
switch (spa->spa_log_state) {
@@ -1474,9 +1577,19 @@ spa_load_verify(spa_t *spa)
if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
sle.sle_data_count <= policy.zrp_maxdata) {
int64_t loss = 0;
verify_ok = B_TRUE;
spa->spa_load_txg = spa->spa_uberblock.ub_txg;
spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
VERIFY(nvlist_add_uint64(spa->spa_load_info,
ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
VERIFY(nvlist_add_int64(spa->spa_load_info,
ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
VERIFY(nvlist_add_uint64(spa->spa_load_info,
ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
} else {
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
}
@@ -1635,13 +1748,21 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
KM_SLEEP) == 0);
}
gethrestime(&spa->spa_loaded_ts);
error = spa_load_impl(spa, pool_guid, config, state, type,
mosconfig, &ereport);
}
spa->spa_minref = refcount_count(&spa->spa_refcount);
if (error && error != EBADF)
zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
if (error) {
if (error != EEXIST) {
spa->spa_loaded_ts.tv_sec = 0;
spa->spa_loaded_ts.tv_nsec = 0;
}
if (error != EBADF) {
zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
}
}
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
spa->spa_ena = 0;
@@ -1661,7 +1782,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
nvlist_t *nvroot = NULL;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
uint64_t config_cache_txg = spa->spa_config_txg;
uint64_t children, config_cache_txg = spa->spa_config_txg;
int orig_mode = spa->spa_mode;
int parse;
uint64_t obj;
@@ -1760,9 +1881,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
/*
* If the vdev guid sum doesn't match the uberblock, we have an
* incomplete configuration.
* incomplete configuration. We first check to see if the pool
* is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
* If it is, defer the vdev_guid_sum check till later so we
* can handle missing vdevs.
*/
if (mosconfig && type != SPA_IMPORT_ASSEMBLE &&
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
&children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
rvd->vdev_guid_sum != ub->ub_guid_sum)
return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
@@ -1981,13 +2106,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
spa_config_exit(spa, SCL_ALL, FTAG);
/*
* Check the state of the root vdev. If it can't be opened, it
* indicates one or more toplevel vdevs are faulted.
*/
if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
return (ENXIO);
/*
* Load the DDTs (dedup tables).
*/
@@ -1997,16 +2115,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_update_dspace(spa);
if (state != SPA_LOAD_TRYIMPORT) {
error = spa_load_verify(spa);
if (error)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
error));
}
/*
* Load the intent log state and check log integrity. If we're
* assembling a pool from a split, the log is not transferred over.
* Validate the config, using the MOS config to fill in any
* information which might be missing. If we fail to validate
* the config then declare the pool unfit for use. If we're
* assembling a pool from a split, the log is not transferred
* over.
*/
if (type != SPA_IMPORT_ASSEMBLE) {
nvlist_t *nvconfig;
@@ -2014,17 +2128,37 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
spa_load_log_state(spa, nvroot);
if (!spa_config_valid(spa, nvconfig)) {
nvlist_free(nvconfig);
return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
ENXIO));
}
nvlist_free(nvconfig);
/*
* Now that we've validate the config, check the state of the
* root vdev. If it can't be opened, it indicates one or
* more toplevel vdevs are faulted.
*/
if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
return (ENXIO);
if (spa_check_logs(spa)) {
*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
}
}
/*
* We've successfully opened the pool, verify that we're ready
* to start pushing transactions.
*/
if (state != SPA_LOAD_TRYIMPORT) {
if (error = spa_load_verify(spa))
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
error));
}
if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
spa->spa_load_max_txg == UINT64_MAX)) {
dmu_tx_t *tx;
@@ -2066,12 +2200,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
* If the config cache is stale, or we have uninitialized
* metaslabs (see spa_vdev_add()), then update the config.
*
* If spa_load_verbatim is true, trust the current
* If this is a verbatim import, trust the current
* in-core spa_config and update the disk labels.
*/
if (config_cache_txg != spa->spa_config_txg ||
state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
state == SPA_LOAD_RECOVER)
state == SPA_LOAD_IMPORT ||
state == SPA_LOAD_RECOVER ||
(spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
need_update = B_TRUE;
for (int c = 0; c < rvd->vdev_children; c++)
@@ -2110,12 +2245,14 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
static int
spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
{
int mode = spa->spa_mode;
spa_unload(spa);
spa_deactivate(spa);
spa->spa_load_max_txg--;
spa_activate(spa, spa_mode_global);
spa_activate(spa, mode);
spa_async_suspend(spa);
return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
@@ -2173,9 +2310,6 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
rewind_error = spa_load_retry(spa, state, mosconfig);
}
if (config)
spa_rewind_data_to_nvlist(spa, config);
spa->spa_extreme_rewind = B_FALSE;
spa->spa_load_max_txg = UINT64_MAX;
@@ -2202,6 +2336,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
nvlist_t **config)
{
spa_t *spa;
spa_load_state_t state = SPA_LOAD_OPEN;
int error;
int locked = B_FALSE;
@@ -2225,7 +2360,6 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
}
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
spa_load_state_t state = SPA_LOAD_OPEN;
zpool_rewind_policy_t policy;
zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
@@ -2264,9 +2398,13 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
* information: the state of each vdev after the
* attempted vdev_open(). Return this to the user.
*/
if (config != NULL && spa->spa_config)
if (config != NULL && spa->spa_config) {
VERIFY(nvlist_dup(spa->spa_config, config,
KM_SLEEP) == 0);
VERIFY(nvlist_add_nvlist(*config,
ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info) == 0);
}
spa_unload(spa);
spa_deactivate(spa);
spa->spa_last_open_failed = error;
@@ -2275,15 +2413,22 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
*spapp = NULL;
return (error);
}
}
spa_open_ref(spa, tag);
if (config != NULL)
*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
/*
* If we've recovered the pool, pass back any information we
* gathered while doing the load.
*/
if (state == SPA_LOAD_RECOVER) {
VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info) == 0);
}
if (locked) {
spa->spa_last_open_failed = 0;
spa->spa_last_ubsync_txg = 0;
@@ -2459,6 +2604,13 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
if (*config != NULL) {
uint64_t loadtimes[2];
loadtimes[0] = spa->spa_loaded_ts.tv_sec;
loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
VERIFY(nvlist_add_uint64_array(*config,
ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
VERIFY(nvlist_add_uint64(*config,
ZPOOL_CONFIG_ERRCOUNT,
spa_get_errlog_size(spa)) == 0);
@@ -3032,7 +3184,7 @@ spa_import_rootpool(char *devpath, char *devid)
spa = spa_add(pname, config, NULL);
spa->spa_is_root = B_TRUE;
spa->spa_load_verbatim = B_TRUE;
spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
/*
* Build up a vdev tree based on the boot device's label config.
@@ -3081,7 +3233,8 @@ spa_import_rootpool(char *devpath, char *devid)
!bvd->vdev_isspare) {
cmn_err(CE_NOTE, "The boot device is currently spared. Please "
"try booting from '%s'",
bvd->vdev_parent->vdev_child[1]->vdev_path);
bvd->vdev_parent->
vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
error = EINVAL;
goto out;
}
@@ -3100,49 +3253,18 @@ out:
#endif
/*
* Take a pool and insert it into the namespace as if it had been loaded at
* boot.
*/
int
spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
{
spa_t *spa;
char *altroot = NULL;
mutex_enter(&spa_namespace_lock);
if (spa_lookup(pool) != NULL) {
mutex_exit(&spa_namespace_lock);
return (EEXIST);
}
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
spa = spa_add(pool, config, altroot);
spa->spa_load_verbatim = B_TRUE;
if (props != NULL)
spa_configfile_set(spa, props, B_FALSE);
spa_config_sync(spa, B_FALSE, B_TRUE);
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, LOG_POOL_IMPORT);
return (0);
}
/*
* Import a non-root pool into the system.
*/
int
spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
{
spa_t *spa;
char *altroot = NULL;
spa_load_state_t state = SPA_LOAD_IMPORT;
zpool_rewind_policy_t policy;
uint64_t mode = spa_mode_global;
uint64_t readonly = B_FALSE;
int error;
nvlist_t *nvroot;
nvlist_t **spares, **l2cache;
@@ -3157,23 +3279,45 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
return (EEXIST);
}
zpool_get_rewind_policy(config, &policy);
if (policy.zrp_request & ZPOOL_DO_REWIND)
state = SPA_LOAD_RECOVER;
/*
* Create and initialize the spa structure.
*/
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
(void) nvlist_lookup_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
if (readonly)
mode = FREAD;
spa = spa_add(pool, config, altroot);
spa_activate(spa, spa_mode_global);
spa->spa_import_flags = flags;
/*
* Verbatim import - Take a pool and insert it into the namespace
* as if it had been loaded at boot.
*/
if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
if (props != NULL)
spa_configfile_set(spa, props, B_FALSE);
spa_config_sync(spa, B_FALSE, B_TRUE);
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, LOG_POOL_IMPORT);
return (0);
}
spa_activate(spa, mode);
/*
* Don't start async tasks until we know everything is healthy.
*/
spa_async_suspend(spa);
zpool_get_rewind_policy(config, &policy);
if (policy.zrp_request & ZPOOL_DO_REWIND)
state = SPA_LOAD_RECOVER;
/*
* Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig
* because the user-supplied config is actually the one to trust when
@@ -3181,14 +3325,16 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
*/
if (state != SPA_LOAD_RECOVER)
spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
policy.zrp_request);
/*
* Propagate anything learned about failing or best txgs
* back to caller
* Propagate anything learned while loading the pool and pass it
* back to caller (i.e. rewind info, missing devices, etc).
*/
spa_rewind_data_to_nvlist(spa, config);
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info) == 0);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
/*
@@ -3228,6 +3374,8 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
return (error);
}
spa_async_resume(spa);
/*
* Override any spares and level 2 cache devices as specified by
* the user, as these may have correct device names/devids, etc.
@@ -3278,8 +3426,6 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
}
spa_async_resume(spa);
/*
* It's possible that the pool was expanded while it was exported.
* We kick off an async task to handle this for us.
@@ -3542,6 +3688,8 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
@@ -3653,6 +3801,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
int newvd_isspare;
int error;
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -3702,7 +3852,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* spares.
*/
if (pvd->vdev_ops == &vdev_spare_ops &&
pvd->vdev_child[1] == oldvd &&
oldvd->vdev_isspare &&
!spa_has_spare(spa, newvd->vdev_guid))
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
@@ -3714,13 +3864,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* the same (spare replaces spare, non-spare replaces
* non-spare).
*/
if (pvd->vdev_ops == &vdev_replacing_ops)
if (pvd->vdev_ops == &vdev_replacing_ops &&
spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
else if (pvd->vdev_ops == &vdev_spare_ops &&
newvd->vdev_isspare != oldvd->vdev_isspare)
} else if (pvd->vdev_ops == &vdev_spare_ops &&
newvd->vdev_isspare != oldvd->vdev_isspare) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
else if (pvd->vdev_ops != &vdev_spare_ops &&
newvd->vdev_isspare)
}
if (newvd->vdev_isspare)
pvops = &vdev_spare_ops;
else
pvops = &vdev_replacing_ops;
@@ -3755,6 +3907,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
}
}
/* mark the device being resilvered */
newvd->vdev_resilvering = B_TRUE;
/*
* If the parent is not a mirror, or if we're replacing, insert the new
* mirror/replacing/spare vdev above oldvd.
@@ -3823,6 +3978,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
spa_strfree(oldvdpath);
spa_strfree(newvdpath);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
return (0);
}
@@ -3840,9 +3998,10 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
vdev_t *vd, *pvd, *cvd, *tvd;
boolean_t unspare = B_FALSE;
uint64_t unspare_guid;
size_t len;
char *vdpath;
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
vd = spa_lookup_by_guid(spa, guid, B_FALSE);
@@ -3872,18 +4031,11 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
/*
* If replace_done is specified, only remove this device if it's
* the first child of a replacing vdev. For the 'spare' vdev, either
* disk can be removed.
* Only 'replacing' or 'spare' vdevs can be replaced.
*/
if (replace_done) {
if (pvd->vdev_ops == &vdev_replacing_ops) {
if (vd->vdev_id != 0)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
} else if (pvd->vdev_ops != &vdev_spare_ops) {
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
}
}
if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
pvd->vdev_ops != &vdev_spare_ops)
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
spa_version(spa) >= SPA_VERSION_SPARES);
@@ -3910,16 +4062,22 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* check to see if we changed the original vdev's path to have "/old"
* at the end in spa_vdev_attach(). If so, undo that change now.
*/
if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 &&
pvd->vdev_child[0]->vdev_path != NULL &&
pvd->vdev_child[1]->vdev_path != NULL) {
ASSERT(pvd->vdev_child[1] == vd);
cvd = pvd->vdev_child[0];
len = strlen(vd->vdev_path);
if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
strcmp(cvd->vdev_path + len, "/old") == 0) {
spa_strfree(cvd->vdev_path);
cvd->vdev_path = spa_strdup(vd->vdev_path);
if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
vd->vdev_path != NULL) {
size_t len = strlen(vd->vdev_path);
for (int c = 0; c < pvd->vdev_children; c++) {
cvd = pvd->vdev_child[c];
if (cvd == vd || cvd->vdev_path == NULL)
continue;
if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
strcmp(cvd->vdev_path + len, "/old") == 0) {
spa_strfree(cvd->vdev_path);
cvd->vdev_path = spa_strdup(vd->vdev_path);
break;
}
}
}
@@ -3929,7 +4087,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* active spare list for the pool.
*/
if (pvd->vdev_ops == &vdev_spare_ops &&
vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
vd->vdev_id == 0 &&
pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
unspare = B_TRUE;
/*
@@ -3951,7 +4110,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
/*
* Remember one of the remaining children so we can get tvd below.
*/
cvd = pvd->vdev_child[0];
cvd = pvd->vdev_child[pvd->vdev_children - 1];
/*
* If we need to remove the remaining child from the list of hot spares,
@@ -3967,14 +4126,20 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
spa_spare_remove(cvd);
unspare_guid = cvd->vdev_guid;
(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
cvd->vdev_unspare = B_TRUE;
}
/*
* If the parent mirror/replacing vdev only has one child,
* the parent is no longer needed. Remove it from the tree.
*/
if (pvd->vdev_children == 1)
if (pvd->vdev_children == 1) {
if (pvd->vdev_ops == &vdev_spare_ops)
cvd->vdev_unspare = B_FALSE;
vdev_remove_parent(cvd);
cvd->vdev_resilvering = B_FALSE;
}
/*
* We don't set tvd until now because the parent we just removed
@@ -4016,6 +4181,9 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
/* hang on to the spa before we release the lock */
spa_open_ref(spa, FTAG);
error = spa_vdev_exit(spa, vd, txg, 0);
spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
@@ -4028,24 +4196,31 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* list of every other pool.
*/
if (unspare) {
spa_t *myspa = spa;
spa = NULL;
spa_t *altspa = NULL;
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa)) != NULL) {
if (spa->spa_state != POOL_STATE_ACTIVE)
while ((altspa = spa_next(altspa)) != NULL) {
if (altspa->spa_state != POOL_STATE_ACTIVE ||
altspa == spa)
continue;
if (spa == myspa)
continue;
spa_open_ref(spa, FTAG);
spa_open_ref(altspa, FTAG);
mutex_exit(&spa_namespace_lock);
(void) spa_vdev_remove(spa, unspare_guid,
B_TRUE);
(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);
spa_close(altspa, FTAG);
}
mutex_exit(&spa_namespace_lock);
/* search the rest of the vdevs for spares to remove */
spa_vdev_resilver_done(spa);
}
/* all done with the spa; OK to release */
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);
mutex_exit(&spa_namespace_lock);
return (error);
}
@@ -4066,8 +4241,7 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
vdev_t *rvd, **vml = NULL; /* vdev modify list */
boolean_t activate_slog;
if (!spa_writeable(spa))
return (EROFS);
ASSERT(spa_writeable(spa));
txg = spa_vdev_enter(spa);
@@ -4484,6 +4658,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
int error = 0;
boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
ASSERT(spa_writeable(spa));
if (!locked)
txg = spa_vdev_enter(spa);
@@ -4593,11 +4769,18 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
}
/*
* Check for a completed replacement.
* Check for a completed replacement. We always consider the first
* vdev in the list to be the oldest vdev, and the last one to be
* the newest (see spa_vdev_attach() for how that works). In
* the case where the newest vdev is faulted, we will not automatically
* remove it after a resilver completes. This is OK as it will require
* user intervention to determine which disk the admin wishes to keep.
*/
if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
if (vd->vdev_ops == &vdev_replacing_ops) {
ASSERT(vd->vdev_children > 1);
newvd = vd->vdev_child[vd->vdev_children - 1];
oldvd = vd->vdev_child[0];
newvd = vd->vdev_child[1];
if (vdev_dtl_empty(newvd, DTL_MISSING) &&
vdev_dtl_empty(newvd, DTL_OUTAGE) &&
@@ -4608,16 +4791,41 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
/*
* Check for a completed resilver with the 'unspare' flag set.
*/
if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) {
newvd = vd->vdev_child[0];
oldvd = vd->vdev_child[1];
if (vd->vdev_ops == &vdev_spare_ops) {
vdev_t *first = vd->vdev_child[0];
vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
if (newvd->vdev_unspare &&
if (last->vdev_unspare) {
oldvd = first;
newvd = last;
} else if (first->vdev_unspare) {
oldvd = last;
newvd = first;
} else {
oldvd = NULL;
}
if (oldvd != NULL &&
vdev_dtl_empty(newvd, DTL_MISSING) &&
vdev_dtl_empty(newvd, DTL_OUTAGE) &&
!vdev_dtl_required(oldvd)) {
newvd->vdev_unspare = 0;
!vdev_dtl_required(oldvd))
return (oldvd);
/*
* If there are more than two spares attached to a disk,
* and those spares are not required, then we want to
* attempt to free them up now so that they can be used
* by other pools. Once we're back down to a single
* disk+spare, we stop removing them.
*/
if (vd->vdev_children > 2) {
newvd = vd->vdev_child[1];
if (newvd->vdev_isspare && last->vdev_isspare &&
vdev_dtl_empty(last, DTL_MISSING) &&
vdev_dtl_empty(last, DTL_OUTAGE) &&
!vdev_dtl_required(newvd))
return (newvd);
}
}
@@ -4644,9 +4852,9 @@ spa_vdev_resilver_done(spa_t *spa)
* we need to detach the parent's first child (the original hot
* spare) as well.
*/
if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
ppvd->vdev_children == 2) {
ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
ASSERT(ppvd->vdev_children == 2);
sguid = ppvd->vdev_child[1]->vdev_guid;
}
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -4670,6 +4878,8 @@ spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
vdev_t *vd;
boolean_t sync = B_FALSE;
ASSERT(spa_writeable(spa));
spa_vdev_state_enter(spa, SCL_ALL);
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
@@ -5115,9 +5325,11 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
ASSERT(spa->spa_root != NULL);
break;
case ZPOOL_PROP_READONLY:
case ZPOOL_PROP_CACHEFILE:
/*
* 'cachefile' is also a non-persisitent property.
* 'readonly' and 'cachefile' are also non-persisitent
* properties.
*/
break;
default:
@@ -5249,6 +5461,8 @@ spa_sync(spa_t *spa, uint64_t txg)
dmu_tx_t *tx;
int error;
VERIFY(spa_writeable(spa));
/*
* Lock out configuration changes.
*/
@@ -5467,7 +5681,8 @@ spa_sync_allpools(void)
spa_t *spa = NULL;
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa)) != NULL) {
if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa))
if (spa_state(spa) != POOL_STATE_ACTIVE ||
!spa_writeable(spa) || spa_suspended(spa))
continue;
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
@@ -5547,6 +5762,8 @@ spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
void
spa_upgrade(spa_t *spa, uint64_t version)
{
ASSERT(spa_writeable(spa));
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
/*