mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-11-17 01:51:00 +03:00
3145 single-copy arc 3212 ztest: race condition between vdev_online() and spa_vdev_remove() Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Eric Schrock <eric.schrock@delphix.com> Reviewed by: Justin T. Gibbs <gibbs@scsiguy.com> Approved by: Eric Schrock <eric.schrock@delphix.com> References: illumos-gate/commit/9253d63df408bb48584e0b1abfcc24ef2472382e illumos changeset: 13840:97fd5cdf328a https://www.illumos.org/issues/3145 https://www.illumos.org/issues/3212 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #989 Closes #1137
This commit is contained in:
parent
753c38392d
commit
1eb5bfa3dc
@ -4965,7 +4965,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
|
|||||||
if (islog)
|
if (islog)
|
||||||
(void) rw_exit(&ztest_name_lock);
|
(void) rw_exit(&ztest_name_lock);
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* Ideally we would like to be able to randomly
|
||||||
|
* call vdev_[on|off]line without holding locks
|
||||||
|
* to force unpredictable failures but the side
|
||||||
|
* effects of vdev_[on|off]line prevent us from
|
||||||
|
* doing so. We grab the ztest_vdev_lock here to
|
||||||
|
* prevent a race between injection testing and
|
||||||
|
* aux_vdev removal.
|
||||||
|
*/
|
||||||
|
mutex_enter(&ztest_vdev_lock);
|
||||||
(void) vdev_online(spa, guid0, 0, NULL);
|
(void) vdev_online(spa, guid0, 0, NULL);
|
||||||
|
mutex_exit(&ztest_vdev_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +109,7 @@ int arc_released(arc_buf_t *buf);
|
|||||||
int arc_has_callback(arc_buf_t *buf);
|
int arc_has_callback(arc_buf_t *buf);
|
||||||
void arc_buf_freeze(arc_buf_t *buf);
|
void arc_buf_freeze(arc_buf_t *buf);
|
||||||
void arc_buf_thaw(arc_buf_t *buf);
|
void arc_buf_thaw(arc_buf_t *buf);
|
||||||
|
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
int arc_referenced(arc_buf_t *buf);
|
int arc_referenced(arc_buf_t *buf);
|
||||||
#endif
|
#endif
|
||||||
|
@ -189,6 +189,7 @@ unsigned long zfs_arc_meta_limit = 0;
|
|||||||
int zfs_arc_grow_retry = 0;
|
int zfs_arc_grow_retry = 0;
|
||||||
int zfs_arc_shrink_shift = 0;
|
int zfs_arc_shrink_shift = 0;
|
||||||
int zfs_arc_p_min_shift = 0;
|
int zfs_arc_p_min_shift = 0;
|
||||||
|
int zfs_disable_dup_eviction = 0;
|
||||||
int zfs_arc_meta_prune = 0;
|
int zfs_arc_meta_prune = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -307,6 +308,9 @@ typedef struct arc_stats {
|
|||||||
kstat_named_t arcstat_l2_size;
|
kstat_named_t arcstat_l2_size;
|
||||||
kstat_named_t arcstat_l2_hdr_size;
|
kstat_named_t arcstat_l2_hdr_size;
|
||||||
kstat_named_t arcstat_memory_throttle_count;
|
kstat_named_t arcstat_memory_throttle_count;
|
||||||
|
kstat_named_t arcstat_duplicate_buffers;
|
||||||
|
kstat_named_t arcstat_duplicate_buffers_size;
|
||||||
|
kstat_named_t arcstat_duplicate_reads;
|
||||||
kstat_named_t arcstat_memory_direct_count;
|
kstat_named_t arcstat_memory_direct_count;
|
||||||
kstat_named_t arcstat_memory_indirect_count;
|
kstat_named_t arcstat_memory_indirect_count;
|
||||||
kstat_named_t arcstat_no_grow;
|
kstat_named_t arcstat_no_grow;
|
||||||
@ -387,6 +391,9 @@ static arc_stats_t arc_stats = {
|
|||||||
{ "l2_size", KSTAT_DATA_UINT64 },
|
{ "l2_size", KSTAT_DATA_UINT64 },
|
||||||
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
|
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
||||||
|
{ "duplicate_buffers", KSTAT_DATA_UINT64 },
|
||||||
|
{ "duplicate_buffers_size", KSTAT_DATA_UINT64 },
|
||||||
|
{ "duplicate_reads", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_direct_count", KSTAT_DATA_UINT64 },
|
{ "memory_direct_count", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
|
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
||||||
@ -1369,6 +1376,17 @@ arc_buf_clone(arc_buf_t *from)
|
|||||||
hdr->b_buf = buf;
|
hdr->b_buf = buf;
|
||||||
arc_get_data_buf(buf);
|
arc_get_data_buf(buf);
|
||||||
bcopy(from->b_data, buf->b_data, size);
|
bcopy(from->b_data, buf->b_data, size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This buffer already exists in the arc so create a duplicate
|
||||||
|
* copy for the caller. If the buffer is associated with user data
|
||||||
|
* then track the size and number of duplicates. These stats will be
|
||||||
|
* updated as duplicate buffers are created and destroyed.
|
||||||
|
*/
|
||||||
|
if (hdr->b_type == ARC_BUFC_DATA) {
|
||||||
|
ARCSTAT_BUMP(arcstat_duplicate_buffers);
|
||||||
|
ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
|
||||||
|
}
|
||||||
hdr->b_datacnt += 1;
|
hdr->b_datacnt += 1;
|
||||||
return (buf);
|
return (buf);
|
||||||
}
|
}
|
||||||
@ -1467,6 +1485,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
|
|||||||
ASSERT3U(state->arcs_size, >=, size);
|
ASSERT3U(state->arcs_size, >=, size);
|
||||||
atomic_add_64(&state->arcs_size, -size);
|
atomic_add_64(&state->arcs_size, -size);
|
||||||
buf->b_data = NULL;
|
buf->b_data = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're destroying a duplicate buffer make sure
|
||||||
|
* that the appropriate statistics are updated.
|
||||||
|
*/
|
||||||
|
if (buf->b_hdr->b_datacnt > 1 &&
|
||||||
|
buf->b_hdr->b_type == ARC_BUFC_DATA) {
|
||||||
|
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
|
||||||
|
ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
|
||||||
|
}
|
||||||
ASSERT(buf->b_hdr->b_datacnt > 0);
|
ASSERT(buf->b_hdr->b_datacnt > 0);
|
||||||
buf->b_hdr->b_datacnt -= 1;
|
buf->b_hdr->b_datacnt -= 1;
|
||||||
}
|
}
|
||||||
@ -1651,6 +1679,48 @@ arc_buf_size(arc_buf_t *buf)
|
|||||||
return (buf->b_hdr->b_size);
|
return (buf->b_hdr->b_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the DMU to determine if the current buffer should be
|
||||||
|
* evicted. In order to ensure proper locking, the eviction must be initiated
|
||||||
|
* from the DMU. Return true if the buffer is associated with user data and
|
||||||
|
* duplicate buffers still exist.
|
||||||
|
*/
|
||||||
|
boolean_t
|
||||||
|
arc_buf_eviction_needed(arc_buf_t *buf)
|
||||||
|
{
|
||||||
|
arc_buf_hdr_t *hdr;
|
||||||
|
boolean_t evict_needed = B_FALSE;
|
||||||
|
|
||||||
|
if (zfs_disable_dup_eviction)
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
mutex_enter(&buf->b_evict_lock);
|
||||||
|
hdr = buf->b_hdr;
|
||||||
|
if (hdr == NULL) {
|
||||||
|
/*
|
||||||
|
* We are in arc_do_user_evicts(); let that function
|
||||||
|
* perform the eviction.
|
||||||
|
*/
|
||||||
|
ASSERT(buf->b_data == NULL);
|
||||||
|
mutex_exit(&buf->b_evict_lock);
|
||||||
|
return (B_FALSE);
|
||||||
|
} else if (buf->b_data == NULL) {
|
||||||
|
/*
|
||||||
|
* We have already been added to the arc eviction list;
|
||||||
|
* recommend eviction.
|
||||||
|
*/
|
||||||
|
ASSERT3P(hdr, ==, &arc_eviction_hdr);
|
||||||
|
mutex_exit(&buf->b_evict_lock);
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
|
||||||
|
evict_needed = B_TRUE;
|
||||||
|
|
||||||
|
mutex_exit(&buf->b_evict_lock);
|
||||||
|
return (evict_needed);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Evict buffers from list until we've removed the specified number of
|
* Evict buffers from list until we've removed the specified number of
|
||||||
* bytes. Move the removed buffers to the appropriate evict state.
|
* bytes. Move the removed buffers to the appropriate evict state.
|
||||||
@ -2753,8 +2823,10 @@ arc_read_done(zio_t *zio)
|
|||||||
abuf = buf;
|
abuf = buf;
|
||||||
for (acb = callback_list; acb; acb = acb->acb_next) {
|
for (acb = callback_list; acb; acb = acb->acb_next) {
|
||||||
if (acb->acb_done) {
|
if (acb->acb_done) {
|
||||||
if (abuf == NULL)
|
if (abuf == NULL) {
|
||||||
|
ARCSTAT_BUMP(arcstat_duplicate_reads);
|
||||||
abuf = arc_buf_clone(buf);
|
abuf = arc_buf_clone(buf);
|
||||||
|
}
|
||||||
acb->acb_buf = abuf;
|
acb->acb_buf = abuf;
|
||||||
abuf = NULL;
|
abuf = NULL;
|
||||||
}
|
}
|
||||||
@ -3324,6 +3396,16 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||||||
ASSERT3U(*size, >=, hdr->b_size);
|
ASSERT3U(*size, >=, hdr->b_size);
|
||||||
atomic_add_64(size, -hdr->b_size);
|
atomic_add_64(size, -hdr->b_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're releasing a duplicate user data buffer, update
|
||||||
|
* our statistics accordingly.
|
||||||
|
*/
|
||||||
|
if (hdr->b_type == ARC_BUFC_DATA) {
|
||||||
|
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
|
||||||
|
ARCSTAT_INCR(arcstat_duplicate_buffers_size,
|
||||||
|
-hdr->b_size);
|
||||||
|
}
|
||||||
hdr->b_datacnt -= 1;
|
hdr->b_datacnt -= 1;
|
||||||
arc_cksum_verify(buf);
|
arc_cksum_verify(buf);
|
||||||
|
|
||||||
|
@ -2189,7 +2189,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
|
|||||||
dbuf_evict(db);
|
dbuf_evict(db);
|
||||||
} else {
|
} else {
|
||||||
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
|
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
|
||||||
if (!DBUF_IS_CACHEABLE(db))
|
|
||||||
|
/*
|
||||||
|
* A dbuf will be eligible for eviction if either the
|
||||||
|
* 'primarycache' property is set or a duplicate
|
||||||
|
* copy of this buffer is already cached in the arc.
|
||||||
|
*
|
||||||
|
* In the case of the 'primarycache' a buffer
|
||||||
|
* is considered for eviction if it matches the
|
||||||
|
* criteria set in the property.
|
||||||
|
*
|
||||||
|
* To decide if our buffer is considered a
|
||||||
|
* duplicate, we must call into the arc to determine
|
||||||
|
* if multiple buffers are referencing the same
|
||||||
|
* block on-disk. If so, then we simply evict
|
||||||
|
* ourselves.
|
||||||
|
*/
|
||||||
|
if (!DBUF_IS_CACHEABLE(db) ||
|
||||||
|
arc_buf_eviction_needed(db->db_buf))
|
||||||
dbuf_clear(db);
|
dbuf_clear(db);
|
||||||
else
|
else
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
Loading…
Reference in New Issue
Block a user