mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 10:54:35 +03:00
Rebase master to b105
This commit is contained in:
+259
-109
@@ -316,8 +316,10 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock);
|
||||
space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock);
|
||||
for (int t = 0; t < DTL_TYPES; t++) {
|
||||
space_map_create(&vd->vdev_dtl[t], 0, -1ULL, 0,
|
||||
&vd->vdev_dtl_lock);
|
||||
}
|
||||
txg_list_create(&vd->vdev_ms_list,
|
||||
offsetof(struct metaslab, ms_txg_node));
|
||||
txg_list_create(&vd->vdev_dtl_list,
|
||||
@@ -474,7 +476,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE)) {
|
||||
if (alloctype == VDEV_ALLOC_LOAD) {
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
|
||||
&vd->vdev_dtl.smo_object);
|
||||
&vd->vdev_dtl_smo.smo_object);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE,
|
||||
&vd->vdev_unspare);
|
||||
}
|
||||
@@ -566,12 +568,14 @@ vdev_free(vdev_t *vd)
|
||||
|
||||
txg_list_destroy(&vd->vdev_ms_list);
|
||||
txg_list_destroy(&vd->vdev_dtl_list);
|
||||
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
space_map_unload(&vd->vdev_dtl_map);
|
||||
space_map_destroy(&vd->vdev_dtl_map);
|
||||
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
|
||||
space_map_destroy(&vd->vdev_dtl_scrub);
|
||||
for (int t = 0; t < DTL_TYPES; t++) {
|
||||
space_map_unload(&vd->vdev_dtl[t]);
|
||||
space_map_destroy(&vd->vdev_dtl[t]);
|
||||
}
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
|
||||
mutex_destroy(&vd->vdev_dtl_lock);
|
||||
mutex_destroy(&vd->vdev_stat_lock);
|
||||
mutex_destroy(&vd->vdev_probe_lock);
|
||||
@@ -709,14 +713,18 @@ vdev_remove_parent(vdev_t *cvd)
|
||||
|
||||
vdev_remove_child(mvd, cvd);
|
||||
vdev_remove_child(pvd, mvd);
|
||||
|
||||
/*
|
||||
* If cvd will replace mvd as a top-level vdev, preserve mvd's guid.
|
||||
* Otherwise, we could have detached an offline device, and when we
|
||||
* go to import the pool we'll think we have two top-level vdevs,
|
||||
* instead of a different version of the same top-level vdev.
|
||||
*/
|
||||
if (mvd->vdev_top == mvd)
|
||||
cvd->vdev_guid = cvd->vdev_guid_sum = mvd->vdev_guid;
|
||||
if (mvd->vdev_top == mvd) {
|
||||
uint64_t guid_delta = mvd->vdev_guid - cvd->vdev_guid;
|
||||
cvd->vdev_guid += guid_delta;
|
||||
cvd->vdev_guid_sum += guid_delta;
|
||||
}
|
||||
cvd->vdev_id = mvd->vdev_id;
|
||||
vdev_add_child(pvd, cvd);
|
||||
vdev_top_update(cvd->vdev_top, cvd->vdev_top);
|
||||
@@ -815,6 +823,7 @@ typedef struct vdev_probe_stats {
|
||||
static void
|
||||
vdev_probe_done(zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
vdev_probe_stats_t *vps = zio->io_private;
|
||||
vdev_t *vd = vps->vps_vd;
|
||||
|
||||
@@ -822,7 +831,7 @@ vdev_probe_done(zio_t *zio)
|
||||
ASSERT(zio->io_vd == vd);
|
||||
if (zio->io_error == 0)
|
||||
vps->vps_readable = 1;
|
||||
if (zio->io_error == 0 && (spa_mode & FWRITE)) {
|
||||
if (zio->io_error == 0 && spa_writeable(spa)) {
|
||||
zio_nowait(zio_write_phys(vps->vps_root, vd,
|
||||
zio->io_offset, zio->io_size, zio->io_data,
|
||||
ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
|
||||
@@ -843,12 +852,12 @@ vdev_probe_done(zio_t *zio)
|
||||
vd->vdev_cant_write |= !vps->vps_writeable;
|
||||
|
||||
if (vdev_readable(vd) &&
|
||||
(vdev_writeable(vd) || !(spa_mode & FWRITE))) {
|
||||
(vdev_writeable(vd) || !spa_writeable(spa))) {
|
||||
zio->io_error = 0;
|
||||
} else {
|
||||
ASSERT(zio->io_error != 0);
|
||||
zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
|
||||
zio->io_spa, vd, NULL, 0, 0);
|
||||
spa, vd, NULL, 0, 0);
|
||||
zio->io_error = ENXIO;
|
||||
}
|
||||
kmem_free(vps, sizeof (*vps));
|
||||
@@ -916,12 +925,15 @@ vdev_probe(vdev_t *vd, zio_t *pio)
|
||||
int
|
||||
vdev_open(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int error;
|
||||
int c;
|
||||
uint64_t osize = 0;
|
||||
uint64_t asize, psize;
|
||||
uint64_t ashift = 0;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
|
||||
vd->vdev_state == VDEV_STATE_CANT_OPEN ||
|
||||
vd->vdev_state == VDEV_STATE_OFFLINE);
|
||||
@@ -1055,16 +1067,12 @@ vdev_open(vdev_t *vd)
|
||||
|
||||
/*
|
||||
* If a leaf vdev has a DTL, and seems healthy, then kick off a
|
||||
* resilver. But don't do this if we are doing a reopen for a
|
||||
* scrub, since this would just restart the scrub we are already
|
||||
* doing.
|
||||
* resilver. But don't do this if we are doing a reopen for a scrub,
|
||||
* since this would just restart the scrub we are already doing.
|
||||
*/
|
||||
if (vd->vdev_children == 0 && !vd->vdev_spa->spa_scrub_reopen) {
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
if (vd->vdev_dtl_map.sm_space != 0 && vdev_writeable(vd))
|
||||
spa_async_request(vd->vdev_spa, SPA_ASYNC_RESILVER);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
}
|
||||
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
|
||||
vdev_resilver_needed(vd, NULL, NULL))
|
||||
spa_async_request(spa, SPA_ASYNC_RESILVER);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -1165,6 +1173,10 @@ vdev_validate(vdev_t *vd)
|
||||
void
|
||||
vdev_close(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
vd->vdev_ops->vdev_op_close(vd);
|
||||
|
||||
vdev_cache_purge(vd);
|
||||
@@ -1283,34 +1295,88 @@ vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
|
||||
(void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg);
|
||||
}
|
||||
|
||||
/*
|
||||
* DTLs.
|
||||
*
|
||||
* A vdev's DTL (dirty time log) is the set of transaction groups for which
|
||||
* the vdev has less than perfect replication. There are three kinds of DTL:
|
||||
*
|
||||
* DTL_MISSING: txgs for which the vdev has no valid copies of the data
|
||||
*
|
||||
* DTL_PARTIAL: txgs for which data is available, but not fully replicated
|
||||
*
|
||||
* DTL_SCRUB: the txgs that could not be repaired by the last scrub; upon
|
||||
* scrub completion, DTL_SCRUB replaces DTL_MISSING in the range of
|
||||
* txgs that was scrubbed.
|
||||
*
|
||||
* DTL_OUTAGE: txgs which cannot currently be read, whether due to
|
||||
* persistent errors or just some device being offline.
|
||||
* Unlike the other three, the DTL_OUTAGE map is not generally
|
||||
* maintained; it's only computed when needed, typically to
|
||||
* determine whether a device can be detached.
|
||||
*
|
||||
* For leaf vdevs, DTL_MISSING and DTL_PARTIAL are identical: the device
|
||||
* either has the data or it doesn't.
|
||||
*
|
||||
* For interior vdevs such as mirror and RAID-Z the picture is more complex.
|
||||
* A vdev's DTL_PARTIAL is the union of its children's DTL_PARTIALs, because
|
||||
* if any child is less than fully replicated, then so is its parent.
|
||||
* A vdev's DTL_MISSING is a modified union of its children's DTL_MISSINGs,
|
||||
* comprising only those txgs which appear in 'maxfaults' or more children;
|
||||
* those are the txgs we don't have enough replication to read. For example,
|
||||
* double-parity RAID-Z can tolerate up to two missing devices (maxfaults == 2);
|
||||
* thus, its DTL_MISSING consists of the set of txgs that appear in more than
|
||||
* two child DTL_MISSING maps.
|
||||
*
|
||||
* It should be clear from the above that to compute the DTLs and outage maps
|
||||
* for all vdevs, it suffices to know just the leaf vdevs' DTL_MISSING maps.
|
||||
* Therefore, that is all we keep on disk. When loading the pool, or after
|
||||
* a configuration change, we generate all other DTLs from first principles.
|
||||
*/
|
||||
void
|
||||
vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size)
|
||||
vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size)
|
||||
{
|
||||
space_map_t *sm = &vd->vdev_dtl[t];
|
||||
|
||||
ASSERT(t < DTL_TYPES);
|
||||
ASSERT(vd != vd->vdev_spa->spa_root_vdev);
|
||||
|
||||
mutex_enter(sm->sm_lock);
|
||||
if (!space_map_contains(sm, txg, size))
|
||||
space_map_add(sm, txg, size);
|
||||
mutex_exit(sm->sm_lock);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size)
|
||||
boolean_t
|
||||
vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size)
|
||||
{
|
||||
int dirty;
|
||||
space_map_t *sm = &vd->vdev_dtl[t];
|
||||
boolean_t dirty = B_FALSE;
|
||||
|
||||
/*
|
||||
* Quick test without the lock -- covers the common case that
|
||||
* there are no dirty time segments.
|
||||
*/
|
||||
if (sm->sm_space == 0)
|
||||
return (0);
|
||||
ASSERT(t < DTL_TYPES);
|
||||
ASSERT(vd != vd->vdev_spa->spa_root_vdev);
|
||||
|
||||
mutex_enter(sm->sm_lock);
|
||||
dirty = space_map_contains(sm, txg, size);
|
||||
if (sm->sm_space != 0)
|
||||
dirty = space_map_contains(sm, txg, size);
|
||||
mutex_exit(sm->sm_lock);
|
||||
|
||||
return (dirty);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
|
||||
{
|
||||
space_map_t *sm = &vd->vdev_dtl[t];
|
||||
boolean_t empty;
|
||||
|
||||
mutex_enter(sm->sm_lock);
|
||||
empty = (sm->sm_space == 0);
|
||||
mutex_exit(sm->sm_lock);
|
||||
|
||||
return (empty);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reassess DTLs after a config change or scrub completion.
|
||||
*/
|
||||
@@ -1318,11 +1384,19 @@ void
|
||||
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int c;
|
||||
avl_tree_t reftree;
|
||||
int minref;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
|
||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
|
||||
|
||||
if (vd->vdev_children == 0) {
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_dtl_reassess(vd->vdev_child[c], txg,
|
||||
scrub_txg, scrub_done);
|
||||
|
||||
if (vd == spa->spa_root_vdev)
|
||||
return;
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
if (scrub_txg != 0 &&
|
||||
(spa->spa_scrub_started || spa->spa_scrub_errors == 0)) {
|
||||
@@ -1333,12 +1407,38 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
* will be valid, so excise the old region and
|
||||
* fold in the scrub dtl. Otherwise, leave the
|
||||
* dtl as-is if there was an error.
|
||||
*
|
||||
* There's little trick here: to excise the beginning
|
||||
* of the DTL_MISSING map, we put it into a reference
|
||||
* tree and then add a segment with refcnt -1 that
|
||||
* covers the range [0, scrub_txg). This means
|
||||
* that each txg in that range has refcnt -1 or 0.
|
||||
* We then add DTL_SCRUB with a refcnt of 2, so that
|
||||
* entries in the range [0, scrub_txg) will have a
|
||||
* positive refcnt -- either 1 or 2. We then convert
|
||||
* the reference tree into the new DTL_MISSING map.
|
||||
*/
|
||||
space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg);
|
||||
space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub);
|
||||
space_map_ref_create(&reftree);
|
||||
space_map_ref_add_map(&reftree,
|
||||
&vd->vdev_dtl[DTL_MISSING], 1);
|
||||
space_map_ref_add_seg(&reftree, 0, scrub_txg, -1);
|
||||
space_map_ref_add_map(&reftree,
|
||||
&vd->vdev_dtl[DTL_SCRUB], 2);
|
||||
space_map_ref_generate_map(&reftree,
|
||||
&vd->vdev_dtl[DTL_MISSING], 1);
|
||||
space_map_ref_destroy(&reftree);
|
||||
}
|
||||
space_map_vacate(&vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
|
||||
space_map_walk(&vd->vdev_dtl[DTL_MISSING],
|
||||
space_map_add, &vd->vdev_dtl[DTL_PARTIAL]);
|
||||
if (scrub_done)
|
||||
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
|
||||
space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
|
||||
space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
|
||||
if (!vdev_readable(vd))
|
||||
space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
|
||||
else
|
||||
space_map_walk(&vd->vdev_dtl[DTL_MISSING],
|
||||
space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
|
||||
if (txg != 0)
|
||||
@@ -1346,35 +1446,34 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the DTLs are always correct under the scrub lock.
|
||||
*/
|
||||
if (vd == spa->spa_root_vdev)
|
||||
mutex_enter(&spa->spa_scrub_lock);
|
||||
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
space_map_vacate(&vd->vdev_dtl_map, NULL, NULL);
|
||||
space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done);
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map);
|
||||
space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
for (int t = 0; t < DTL_TYPES; t++) {
|
||||
if (t == DTL_SCRUB)
|
||||
continue; /* leaf vdevs only */
|
||||
if (t == DTL_PARTIAL)
|
||||
minref = 1; /* i.e. non-zero */
|
||||
else if (vd->vdev_nparity != 0)
|
||||
minref = vd->vdev_nparity + 1; /* RAID-Z */
|
||||
else
|
||||
minref = vd->vdev_children; /* any kind of mirror */
|
||||
space_map_ref_create(&reftree);
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
mutex_enter(&cvd->vdev_dtl_lock);
|
||||
space_map_ref_add_map(&reftree, &cvd->vdev_dtl[t], 1);
|
||||
mutex_exit(&cvd->vdev_dtl_lock);
|
||||
}
|
||||
space_map_ref_generate_map(&reftree, &vd->vdev_dtl[t], minref);
|
||||
space_map_ref_destroy(&reftree);
|
||||
}
|
||||
|
||||
if (vd == spa->spa_root_vdev)
|
||||
mutex_exit(&spa->spa_scrub_lock);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_dtl_load(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
space_map_obj_t *smo = &vd->vdev_dtl;
|
||||
space_map_obj_t *smo = &vd->vdev_dtl_smo;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
dmu_buf_t *db;
|
||||
int error;
|
||||
@@ -1392,7 +1491,8 @@ vdev_dtl_load(vdev_t *vd)
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
error = space_map_load(&vd->vdev_dtl_map, NULL, SM_ALLOC, smo, mos);
|
||||
error = space_map_load(&vd->vdev_dtl[DTL_MISSING],
|
||||
NULL, SM_ALLOC, smo, mos);
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
|
||||
return (error);
|
||||
@@ -1402,8 +1502,8 @@ void
|
||||
vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
space_map_obj_t *smo = &vd->vdev_dtl;
|
||||
space_map_t *sm = &vd->vdev_dtl_map;
|
||||
space_map_obj_t *smo = &vd->vdev_dtl_smo;
|
||||
space_map_t *sm = &vd->vdev_dtl[DTL_MISSING];
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
space_map_t smsync;
|
||||
kmutex_t smlock;
|
||||
@@ -1460,6 +1560,37 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether the specified vdev can be offlined/detached/removed
|
||||
* without losing data.
|
||||
*/
|
||||
boolean_t
|
||||
vdev_dtl_required(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *tvd = vd->vdev_top;
|
||||
uint8_t cant_read = vd->vdev_cant_read;
|
||||
boolean_t required;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
if (vd == spa->spa_root_vdev || vd == tvd)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* Temporarily mark the device as unreadable, and then determine
|
||||
* whether this results in any DTL outages in the top-level vdev.
|
||||
* If not, we can safely offline/detach/remove the device.
|
||||
*/
|
||||
vd->vdev_cant_read = B_TRUE;
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
|
||||
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
|
||||
vd->vdev_cant_read = cant_read;
|
||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
|
||||
|
||||
return (required);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if resilver is needed, and if so the txg range.
|
||||
*/
|
||||
@@ -1472,19 +1603,19 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
|
||||
|
||||
if (vd->vdev_children == 0) {
|
||||
mutex_enter(&vd->vdev_dtl_lock);
|
||||
if (vd->vdev_dtl_map.sm_space != 0 && vdev_writeable(vd)) {
|
||||
if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
|
||||
vdev_writeable(vd)) {
|
||||
space_seg_t *ss;
|
||||
|
||||
ss = avl_first(&vd->vdev_dtl_map.sm_root);
|
||||
ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
|
||||
thismin = ss->ss_start - 1;
|
||||
ss = avl_last(&vd->vdev_dtl_map.sm_root);
|
||||
ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
|
||||
thismax = ss->ss_end;
|
||||
needed = B_TRUE;
|
||||
}
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
} else {
|
||||
int c;
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
uint64_t cmin, cmax;
|
||||
|
||||
@@ -1506,12 +1637,10 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
|
||||
void
|
||||
vdev_load(vdev_t *vd)
|
||||
{
|
||||
int c;
|
||||
|
||||
/*
|
||||
* Recursively load all children.
|
||||
*/
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_load(vd->vdev_child[c]);
|
||||
|
||||
/*
|
||||
@@ -1731,11 +1860,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
||||
vd->vdev_parent->vdev_child[0] == vd)
|
||||
vd->vdev_unspare = B_TRUE;
|
||||
|
||||
(void) spa_vdev_state_exit(spa, vd, 0);
|
||||
|
||||
VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0);
|
||||
|
||||
return (0);
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1756,13 +1881,10 @@ vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
||||
*/
|
||||
if (!vd->vdev_offline) {
|
||||
/*
|
||||
* If this device's top-level vdev has a non-empty DTL,
|
||||
* don't allow the device to be offlined.
|
||||
*
|
||||
* XXX -- make this more precise by allowing the offline
|
||||
* as long as the remaining devices don't have any DTL holes.
|
||||
* If this device has the only valid copy of some data,
|
||||
* don't allow it to be offlined.
|
||||
*/
|
||||
if (vd->vdev_top->vdev_dtl_map.sm_space != 0)
|
||||
if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
|
||||
/*
|
||||
@@ -1772,7 +1894,7 @@ vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
||||
*/
|
||||
vd->vdev_offline = B_TRUE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
if (vdev_is_dead(vd->vdev_top) && vd->vdev_aux == NULL) {
|
||||
if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
|
||||
vd->vdev_offline = B_FALSE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
@@ -1852,13 +1974,17 @@ vdev_writeable(vdev_t *vd)
|
||||
boolean_t
|
||||
vdev_allocatable(vdev_t *vd)
|
||||
{
|
||||
uint64_t state = vd->vdev_state;
|
||||
|
||||
/*
|
||||
* We currently allow allocations from vdevs which maybe in the
|
||||
* We currently allow allocations from vdevs which may be in the
|
||||
* process of reopening (i.e. VDEV_STATE_CLOSED). If the device
|
||||
* fails to reopen then we'll catch it later when we're holding
|
||||
* the proper locks.
|
||||
* the proper locks. Note that we have to get the vdev state
|
||||
* in a local variable because although it changes atomically,
|
||||
* we're asking two separate questions about it.
|
||||
*/
|
||||
return (!(vdev_is_dead(vd) && vd->vdev_state != VDEV_STATE_CLOSED) &&
|
||||
return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
|
||||
!vd->vdev_cant_write);
|
||||
}
|
||||
|
||||
@@ -1928,7 +2054,8 @@ vdev_clear_stats(vdev_t *vd)
|
||||
void
|
||||
vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
{
|
||||
vdev_t *rvd = zio->io_spa->spa_root_vdev;
|
||||
spa_t *spa = zio->io_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
vdev_t *vd = zio->io_vd ? zio->io_vd : rvd;
|
||||
vdev_t *pvd;
|
||||
uint64_t txg = zio->io_txg;
|
||||
@@ -1961,21 +2088,23 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
return;
|
||||
|
||||
ASSERT(vd == zio->io_vd);
|
||||
if (!(flags & ZIO_FLAG_IO_BYPASS)) {
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
vs->vs_ops[type]++;
|
||||
vs->vs_bytes[type] += psize;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
}
|
||||
|
||||
if (flags & ZIO_FLAG_IO_BYPASS)
|
||||
return;
|
||||
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
|
||||
if (flags & ZIO_FLAG_IO_REPAIR) {
|
||||
ASSERT(zio->io_delegate_list == NULL);
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
if (flags & ZIO_FLAG_SCRUB_THREAD)
|
||||
vs->vs_scrub_repaired += psize;
|
||||
else
|
||||
if (flags & ZIO_FLAG_SELF_HEAL)
|
||||
vs->vs_self_healed += psize;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
}
|
||||
|
||||
vs->vs_ops[type]++;
|
||||
vs->vs_bytes[type] += psize;
|
||||
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1993,19 +2122,39 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
vs->vs_write_errors++;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
if (type == ZIO_TYPE_WRITE && txg != 0 && vd->vdev_children == 0) {
|
||||
if (flags & ZIO_FLAG_SCRUB_THREAD) {
|
||||
ASSERT(flags & ZIO_FLAG_IO_REPAIR);
|
||||
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
|
||||
vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1);
|
||||
}
|
||||
if (!(flags & ZIO_FLAG_IO_REPAIR)) {
|
||||
if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1))
|
||||
if (type == ZIO_TYPE_WRITE && txg != 0 &&
|
||||
(!(flags & ZIO_FLAG_IO_REPAIR) ||
|
||||
(flags & ZIO_FLAG_SCRUB_THREAD))) {
|
||||
/*
|
||||
* This is either a normal write (not a repair), or it's a
|
||||
* repair induced by the scrub thread. In the normal case,
|
||||
* we commit the DTL change in the same txg as the block
|
||||
* was born. In the scrub-induced repair case, we know that
|
||||
* scrubs run in first-pass syncing context, so we commit
|
||||
* the DTL change in spa->spa_syncing_txg.
|
||||
*
|
||||
* We currently do not make DTL entries for failed spontaneous
|
||||
* self-healing writes triggered by normal (non-scrubbing)
|
||||
* reads, because we have no transactional context in which to
|
||||
* do so -- and it's not clear that it'd be desirable anyway.
|
||||
*/
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
uint64_t commit_txg = txg;
|
||||
if (flags & ZIO_FLAG_SCRUB_THREAD) {
|
||||
ASSERT(flags & ZIO_FLAG_IO_REPAIR);
|
||||
ASSERT(spa_sync_pass(spa) == 1);
|
||||
vdev_dtl_dirty(vd, DTL_SCRUB, txg, 1);
|
||||
commit_txg = spa->spa_syncing_txg;
|
||||
}
|
||||
ASSERT(commit_txg >= spa->spa_syncing_txg);
|
||||
if (vdev_dtl_contains(vd, DTL_MISSING, txg, 1))
|
||||
return;
|
||||
vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
|
||||
for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
|
||||
vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1);
|
||||
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
|
||||
vdev_dtl_dirty(pvd, DTL_PARTIAL, txg, 1);
|
||||
vdev_dirty(vd->vdev_top, VDD_DTL, vd, commit_txg);
|
||||
}
|
||||
if (vd != rvd)
|
||||
vdev_dtl_dirty(vd, DTL_MISSING, txg, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2218,7 +2367,8 @@ vdev_state_clean(vdev_t *vd)
|
||||
void
|
||||
vdev_propagate_state(vdev_t *vd)
|
||||
{
|
||||
vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int degraded = 0, faulted = 0;
|
||||
int corrupted = 0;
|
||||
int c;
|
||||
@@ -2229,7 +2379,7 @@ vdev_propagate_state(vdev_t *vd)
|
||||
child = vd->vdev_child[c];
|
||||
|
||||
if (!vdev_readable(child) ||
|
||||
(!vdev_writeable(child) && (spa_mode & FWRITE))) {
|
||||
(!vdev_writeable(child) && spa_writeable(spa))) {
|
||||
/*
|
||||
* Root special: if there is a top-level log
|
||||
* device, treat the root vdev as if it were
|
||||
|
||||
Reference in New Issue
Block a user