Re-embed multilist_t storage

This commit partially reverts changes to multilists in PR 7968
(multi-threaded spa-sync()) and adds some cache line alignments to
separate read-only multilists and heavily modified refcount's to different
cache lines.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-by: iXsystems, Inc.
Closes #12158
This commit is contained in:
Alexander Motin 2021-06-10 12:42:31 -04:00 committed by GitHub
parent eec5ba113e
commit ffdf019cb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 99 additions and 104 deletions

View File

@ -74,20 +74,20 @@ typedef struct arc_state {
/* /*
* list of evictable buffers * list of evictable buffers
*/ */
multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; multilist_t arcs_list[ARC_BUFC_NUMTYPES];
/*
* supports the "dbufs" kstat
*/
arc_state_type_t arcs_state;
/* /*
* total amount of evictable data in this state * total amount of evictable data in this state
*/ */
zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
/* /*
* total amount of data in this state; this includes: evictable, * total amount of data in this state; this includes: evictable,
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
*/ */
zfs_refcount_t arcs_size; zfs_refcount_t arcs_size;
/*
* supports the "dbufs" kstat
*/
arc_state_type_t arcs_state;
} arc_state_t; } arc_state_t;
typedef struct arc_callback arc_callback_t; typedef struct arc_callback arc_callback_t;

View File

@ -153,7 +153,7 @@ struct objset {
/* no lock needed: */ /* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */ struct dmu_tx *os_synctx; /* XXX sketchy */
zil_header_t os_zil_header; zil_header_t os_zil_header;
multilist_t *os_synced_dnodes; multilist_t os_synced_dnodes;
uint64_t os_flags; uint64_t os_flags;
uint64_t os_freed_dnodes; uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes; boolean_t os_rescan_dnodes;
@ -172,7 +172,7 @@ struct objset {
/* Protected by os_lock */ /* Protected by os_lock */
kmutex_t os_lock; kmutex_t os_lock;
multilist_t *os_dirty_dnodes[TXG_SIZE]; multilist_t os_dirty_dnodes[TXG_SIZE];
list_t os_dnodes; list_t os_dnodes;
list_t os_downgraded_dbufs; list_t os_downgraded_dbufs;

View File

@ -206,7 +206,7 @@ struct metaslab_class {
* List of all loaded metaslabs in the class, sorted in order of most * List of all loaded metaslabs in the class, sorted in order of most
* recent use. * recent use.
*/ */
multilist_t *mc_metaslab_txg_list; multilist_t mc_metaslab_txg_list;
metaslab_class_allocator_t mc_allocator[]; metaslab_class_allocator_t mc_allocator[];
}; };

View File

@ -71,8 +71,9 @@ struct multilist {
multilist_sublist_index_func_t *ml_index_func; multilist_sublist_index_func_t *ml_index_func;
}; };
void multilist_create(multilist_t *, size_t, size_t,
multilist_sublist_index_func_t *);
void multilist_destroy(multilist_t *); void multilist_destroy(multilist_t *);
multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);
void multilist_insert(multilist_t *, void *); void multilist_insert(multilist_t *, void *);
void multilist_remove(multilist_t *, void *); void multilist_remove(multilist_t *, void *);

View File

@ -2327,7 +2327,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
(state != arc_anon)) { (state != arc_anon)) {
/* We don't use the L2-only state list. */ /* We don't use the L2-only state list. */
if (state != arc_l2c_only) { if (state != arc_l2c_only) {
multilist_remove(state->arcs_list[arc_buf_type(hdr)], multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
hdr); hdr);
arc_evictable_space_decrement(hdr, state); arc_evictable_space_decrement(hdr, state);
} }
@ -2361,7 +2361,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
*/ */
if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
(state != arc_anon)) { (state != arc_anon)) {
multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr); multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
arc_evictable_space_increment(hdr, state); arc_evictable_space_increment(hdr, state);
} }
@ -2464,7 +2464,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (refcnt == 0) { if (refcnt == 0) {
if (old_state != arc_anon && old_state != arc_l2c_only) { if (old_state != arc_anon && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
multilist_remove(old_state->arcs_list[buftype], hdr); multilist_remove(&old_state->arcs_list[buftype], hdr);
if (GHOST_STATE(old_state)) { if (GHOST_STATE(old_state)) {
ASSERT0(bufcnt); ASSERT0(bufcnt);
@ -2481,7 +2481,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* beforehand. * beforehand.
*/ */
ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr));
multilist_insert(new_state->arcs_list[buftype], hdr); multilist_insert(&new_state->arcs_list[buftype], hdr);
if (GHOST_STATE(new_state)) { if (GHOST_STATE(new_state)) {
ASSERT0(bufcnt); ASSERT0(bufcnt);
@ -2633,8 +2633,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* L2 headers should never be on the L2 state list since they don't * L2 headers should never be on the L2 state list since they don't
* have L1 headers allocated. * have L1 headers allocated.
*/ */
ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
} }
void void
@ -4200,7 +4200,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type) arc_buf_contents_t type)
{ {
uint64_t total_evicted = 0; uint64_t total_evicted = 0;
multilist_t *ml = state->arcs_list[type]; multilist_t *ml = &state->arcs_list[type];
int num_sublists; int num_sublists;
arc_buf_hdr_t **markers; arc_buf_hdr_t **markers;
@ -4534,8 +4534,8 @@ arc_evict_meta(uint64_t meta_used)
static arc_buf_contents_t static arc_buf_contents_t
arc_evict_type(arc_state_t *state) arc_evict_type(arc_state_t *state)
{ {
multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA]; multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA]; multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
int data_idx = multilist_get_random_index(data_ml); int data_idx = multilist_get_random_index(data_ml);
int meta_idx = multilist_get_random_index(meta_ml); int meta_idx = multilist_get_random_index(meta_ml);
multilist_sublist_t *data_mls; multilist_sublist_t *data_mls;
@ -7455,44 +7455,44 @@ arc_state_init(void)
arc_mfu_ghost = &ARC_mfu_ghost; arc_mfu_ghost = &ARC_mfu_ghost;
arc_l2c_only = &ARC_l2c_only; arc_l2c_only = &ARC_l2c_only;
arc_mru->arcs_list[ARC_BUFC_METADATA] = multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mru->arcs_list[ARC_BUFC_DATA] = multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] = multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mru_ghost->arcs_list[ARC_BUFC_DATA] = multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mfu->arcs_list[ARC_BUFC_METADATA] = multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mfu->arcs_list[ARC_BUFC_DATA] = multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] = multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] = multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_l2c_only->arcs_list[ARC_BUFC_METADATA] = multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
arc_l2c_only->arcs_list[ARC_BUFC_DATA] = multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
multilist_create(sizeof (arc_buf_hdr_t), sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func); arc_state_multilist_index_func);
@ -7558,16 +7558,16 @@ arc_state_fini(void)
zfs_refcount_destroy(&arc_mfu_ghost->arcs_size); zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
zfs_refcount_destroy(&arc_l2c_only->arcs_size); zfs_refcount_destroy(&arc_l2c_only->arcs_size);
multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
aggsum_fini(&arc_meta_used); aggsum_fini(&arc_meta_used);
aggsum_fini(&arc_size); aggsum_fini(&arc_size);
@ -8624,16 +8624,16 @@ l2arc_sublist_lock(int list_num)
switch (list_num) { switch (list_num) {
case 0: case 0:
ml = arc_mfu->arcs_list[ARC_BUFC_METADATA]; ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
break; break;
case 1: case 1:
ml = arc_mru->arcs_list[ARC_BUFC_METADATA]; ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
break; break;
case 2: case 2:
ml = arc_mfu->arcs_list[ARC_BUFC_DATA]; ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
break; break;
case 3: case 3:
ml = arc_mru->arcs_list[ARC_BUFC_DATA]; ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
break; break;
default: default:
return (NULL); return (NULL);

View File

@ -203,8 +203,8 @@ static boolean_t dbuf_evict_thread_exit;
* by those caches' matching enum values (from dbuf_cached_state_t). * by those caches' matching enum values (from dbuf_cached_state_t).
*/ */
typedef struct dbuf_cache { typedef struct dbuf_cache {
multilist_t *cache; multilist_t cache;
zfs_refcount_t size; zfs_refcount_t size ____cacheline_aligned;
} dbuf_cache_t; } dbuf_cache_t;
dbuf_cache_t dbuf_caches[DB_CACHE_MAX]; dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
@ -667,9 +667,9 @@ dbuf_cache_above_lowater(void)
static void static void
dbuf_evict_one(void) dbuf_evict_one(void)
{ {
int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache); int idx = multilist_get_random_index(&dbuf_caches[DB_DBUF_CACHE].cache);
multilist_sublist_t *mls = multilist_sublist_lock( multilist_sublist_t *mls = multilist_sublist_lock(
dbuf_caches[DB_DBUF_CACHE].cache, idx); &dbuf_caches[DB_DBUF_CACHE].cache, idx);
ASSERT(!MUTEX_HELD(&dbuf_evict_lock)); ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
@ -833,8 +833,8 @@ retry:
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0); dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
dbuf_caches[dcs].cache = multilist_create(&dbuf_caches[dcs].cache,
multilist_create(sizeof (dmu_buf_impl_t), sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_cache_link), offsetof(dmu_buf_impl_t, db_cache_link),
dbuf_cache_multilist_index_func); dbuf_cache_multilist_index_func);
zfs_refcount_create(&dbuf_caches[dcs].size); zfs_refcount_create(&dbuf_caches[dcs].size);
@ -901,7 +901,7 @@ dbuf_fini(void)
for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
zfs_refcount_destroy(&dbuf_caches[dcs].size); zfs_refcount_destroy(&dbuf_caches[dcs].size);
multilist_destroy(dbuf_caches[dcs].cache); multilist_destroy(&dbuf_caches[dcs].cache);
} }
if (dbuf_ksp != NULL) { if (dbuf_ksp != NULL) {
@ -2755,7 +2755,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
ASSERT(db->db_caching_status == DB_DBUF_CACHE || ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
db->db_caching_status == DB_DBUF_METADATA_CACHE); db->db_caching_status == DB_DBUF_METADATA_CACHE);
multilist_remove(dbuf_caches[db->db_caching_status].cache, db); multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
(void) zfs_refcount_remove_many( (void) zfs_refcount_remove_many(
&dbuf_caches[db->db_caching_status].size, &dbuf_caches[db->db_caching_status].size,
db->db.db_size, db); db->db.db_size, db);
@ -3465,7 +3465,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(db->db_caching_status == DB_DBUF_CACHE || ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
db->db_caching_status == DB_DBUF_METADATA_CACHE); db->db_caching_status == DB_DBUF_METADATA_CACHE);
multilist_remove(dbuf_caches[db->db_caching_status].cache, db); multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
(void) zfs_refcount_remove_many( (void) zfs_refcount_remove_many(
&dbuf_caches[db->db_caching_status].size, &dbuf_caches[db->db_caching_status].size,
db->db.db_size, db); db->db.db_size, db);
@ -3707,7 +3707,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
db->db_caching_status = dcs; db->db_caching_status = dcs;
multilist_insert(dbuf_caches[dcs].cache, db); multilist_insert(&dbuf_caches[dcs].cache, db);
size = zfs_refcount_add_many( size = zfs_refcount_add_many(
&dbuf_caches[dcs].size, &dbuf_caches[dcs].size,
db->db.db_size, db); db->db.db_size, db);

View File

@ -601,7 +601,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_zil = zil_alloc(os, &os->os_zil_header); os->os_zil = zil_alloc(os, &os->os_zil_header);
for (i = 0; i < TXG_SIZE; i++) { for (i = 0; i < TXG_SIZE; i++) {
os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t), multilist_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
offsetof(dnode_t, dn_dirty_link[i]), offsetof(dnode_t, dn_dirty_link[i]),
dnode_multilist_index_func); dnode_multilist_index_func);
} }
@ -995,9 +995,8 @@ dmu_objset_evict_done(objset_t *os)
mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_obj_lock);
mutex_destroy(&os->os_user_ptr_lock); mutex_destroy(&os->os_user_ptr_lock);
mutex_destroy(&os->os_upgrade_lock); mutex_destroy(&os->os_upgrade_lock);
for (int i = 0; i < TXG_SIZE; i++) { for (int i = 0; i < TXG_SIZE; i++)
multilist_destroy(os->os_dirty_dnodes[i]); multilist_destroy(&os->os_dirty_dnodes[i]);
}
spa_evicting_os_deregister(os->os_spa, os); spa_evicting_os_deregister(os->os_spa, os);
kmem_free(os, sizeof (objset_t)); kmem_free(os, sizeof (objset_t));
} }
@ -1520,7 +1519,7 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
* of why this dnode hold is always needed (even when not * of why this dnode hold is always needed (even when not
* doing user accounting). * doing user accounting).
*/ */
multilist_t *newlist = dn->dn_objset->os_synced_dnodes; multilist_t *newlist = &dn->dn_objset->os_synced_dnodes;
(void) dnode_add_ref(dn, newlist); (void) dnode_add_ref(dn, newlist);
multilist_insert(newlist, dn); multilist_insert(newlist, dn);
@ -1689,17 +1688,16 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
* dn_dirty_link[] of this txg. But it may already * dn_dirty_link[] of this txg. But it may already
* exist because we call dsl_dataset_sync() twice per txg. * exist because we call dsl_dataset_sync() twice per txg.
*/ */
if (os->os_synced_dnodes == NULL) { if (os->os_synced_dnodes.ml_sublists == NULL) {
os->os_synced_dnodes = multilist_create(&os->os_synced_dnodes, sizeof (dnode_t),
multilist_create(sizeof (dnode_t),
offsetof(dnode_t, dn_dirty_link[txgoff]), offsetof(dnode_t, dn_dirty_link[txgoff]),
dnode_multilist_index_func); dnode_multilist_index_func);
} else { } else {
ASSERT3U(os->os_synced_dnodes->ml_offset, ==, ASSERT3U(os->os_synced_dnodes.ml_offset, ==,
offsetof(dnode_t, dn_dirty_link[txgoff])); offsetof(dnode_t, dn_dirty_link[txgoff]));
} }
ml = os->os_dirty_dnodes[txgoff]; ml = &os->os_dirty_dnodes[txgoff];
num_sublists = multilist_get_num_sublists(ml); num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) { for (int i = 0; i < num_sublists; i++) {
if (multilist_sublist_is_empty_idx(ml, i)) if (multilist_sublist_is_empty_idx(ml, i))
@ -1738,7 +1736,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
boolean_t boolean_t
dmu_objset_is_dirty(objset_t *os, uint64_t txg) dmu_objset_is_dirty(objset_t *os, uint64_t txg)
{ {
return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK])); return (!multilist_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]));
} }
static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES]; static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
@ -1949,7 +1947,7 @@ userquota_updates_task(void *arg)
userquota_cache_t cache = { { 0 } }; userquota_cache_t cache = { { 0 } };
multilist_sublist_t *list = multilist_sublist_t *list =
multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx);
ASSERT(multilist_sublist_head(list) == NULL || ASSERT(multilist_sublist_head(list) == NULL ||
dmu_objset_userused_enabled(os)); dmu_objset_userused_enabled(os));
@ -2006,7 +2004,7 @@ userquota_updates_task(void *arg)
mutex_exit(&dn->dn_mtx); mutex_exit(&dn->dn_mtx);
multilist_sublist_remove(list, dn); multilist_sublist_remove(list, dn);
dnode_rele(dn, os->os_synced_dnodes); dnode_rele(dn, &os->os_synced_dnodes);
} }
do_userquota_cacheflush(os, &cache, tx); do_userquota_cacheflush(os, &cache, tx);
multilist_sublist_unlock(list); multilist_sublist_unlock(list);
@ -2032,12 +2030,12 @@ dnode_rele_task(void *arg)
objset_t *os = uua->uua_os; objset_t *os = uua->uua_os;
multilist_sublist_t *list = multilist_sublist_t *list =
multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx); multilist_sublist_lock(&os->os_synced_dnodes, uua->uua_sublist_idx);
dnode_t *dn; dnode_t *dn;
while ((dn = multilist_sublist_head(list)) != NULL) { while ((dn = multilist_sublist_head(list)) != NULL) {
multilist_sublist_remove(list, dn); multilist_sublist_remove(list, dn);
dnode_rele(dn, os->os_synced_dnodes); dnode_rele(dn, &os->os_synced_dnodes);
} }
multilist_sublist_unlock(list); multilist_sublist_unlock(list);
kmem_free(uua, sizeof (*uua)); kmem_free(uua, sizeof (*uua));
@ -2093,7 +2091,7 @@ dmu_objset_sync_done(objset_t *os, dmu_tx_t *tx)
{ {
boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx); boolean_t need_userquota = dmu_objset_do_userquota_updates_prep(os, tx);
int num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); int num_sublists = multilist_get_num_sublists(&os->os_synced_dnodes);
for (int i = 0; i < num_sublists; i++) { for (int i = 0; i < num_sublists; i++) {
userquota_updates_arg_t *uua = userquota_updates_arg_t *uua =
kmem_alloc(sizeof (*uua), KM_SLEEP); kmem_alloc(sizeof (*uua), KM_SLEEP);

View File

@ -1671,7 +1671,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
*/ */
dmu_objset_userquota_get_ids(dn, B_TRUE, tx); dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK]; multilist_t *dirtylist = &os->os_dirty_dnodes[txg & TXG_MASK];
multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn); multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
/* /*

View File

@ -2267,8 +2267,7 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
dsl_bookmark_sync_done(ds, tx); dsl_bookmark_sync_done(ds, tx);
multilist_destroy(os->os_synced_dnodes); multilist_destroy(&os->os_synced_dnodes);
os->os_synced_dnodes = NULL;
if (os->os_encrypted) if (os->os_encrypted)
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;

View File

@ -568,8 +568,7 @@ dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
VERIFY0(zio_wait(zio)); VERIFY0(zio_wait(zio));
dmu_objset_sync_done(dp->dp_meta_objset, tx); dmu_objset_sync_done(dp->dp_meta_objset, tx);
taskq_wait(dp->dp_sync_taskq); taskq_wait(dp->dp_sync_taskq);
multilist_destroy(dp->dp_meta_objset->os_synced_dnodes); multilist_destroy(&dp->dp_meta_objset->os_synced_dnodes);
dp->dp_meta_objset->os_synced_dnodes = NULL;
dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);

View File

@ -416,7 +416,7 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops)
mc->mc_spa = spa; mc->mc_spa = spa;
mc->mc_ops = ops; mc->mc_ops = ops;
mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL);
mc->mc_metaslab_txg_list = multilist_create(sizeof (metaslab_t), multilist_create(&mc->mc_metaslab_txg_list, sizeof (metaslab_t),
offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func); offsetof(metaslab_t, ms_class_txg_node), metaslab_idx_func);
for (int i = 0; i < spa->spa_alloc_count; i++) { for (int i = 0; i < spa->spa_alloc_count; i++) {
metaslab_class_allocator_t *mca = &mc->mc_allocator[i]; metaslab_class_allocator_t *mca = &mc->mc_allocator[i];
@ -443,7 +443,7 @@ metaslab_class_destroy(metaslab_class_t *mc)
zfs_refcount_destroy(&mca->mca_alloc_slots); zfs_refcount_destroy(&mca->mca_alloc_slots);
} }
mutex_destroy(&mc->mc_lock); mutex_destroy(&mc->mc_lock);
multilist_destroy(mc->mc_metaslab_txg_list); multilist_destroy(&mc->mc_metaslab_txg_list);
kmem_free(mc, offsetof(metaslab_class_t, kmem_free(mc, offsetof(metaslab_class_t,
mc_allocator[spa->spa_alloc_count])); mc_allocator[spa->spa_alloc_count]));
} }
@ -639,7 +639,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
void void
metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg) metaslab_class_evict_old(metaslab_class_t *mc, uint64_t txg)
{ {
multilist_t *ml = mc->mc_metaslab_txg_list; multilist_t *ml = &mc->mc_metaslab_txg_list;
for (int i = 0; i < multilist_get_num_sublists(ml); i++) { for (int i = 0; i < multilist_get_num_sublists(ml); i++) {
multilist_sublist_t *mls = multilist_sublist_lock(ml, i); multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
metaslab_t *msp = multilist_sublist_head(mls); metaslab_t *msp = multilist_sublist_head(mls);
@ -1139,7 +1139,7 @@ metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
metaslab_class_t *mc = msp->ms_group->mg_class; metaslab_class_t *mc = msp->ms_group->mg_class;
multilist_sublist_t *mls = multilist_sublist_t *mls =
multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
if (multilist_link_active(&msp->ms_class_txg_node)) if (multilist_link_active(&msp->ms_class_txg_node))
multilist_sublist_remove(mls, msp); multilist_sublist_remove(mls, msp);
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
@ -2175,20 +2175,20 @@ metaslab_potentially_evict(metaslab_class_t *mc)
uint64_t size = spl_kmem_cache_entry_size(zfs_btree_leaf_cache); uint64_t size = spl_kmem_cache_entry_size(zfs_btree_leaf_cache);
int tries = 0; int tries = 0;
for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size && for (; allmem * zfs_metaslab_mem_limit / 100 < inuse * size &&
tries < multilist_get_num_sublists(mc->mc_metaslab_txg_list) * 2; tries < multilist_get_num_sublists(&mc->mc_metaslab_txg_list) * 2;
tries++) { tries++) {
unsigned int idx = multilist_get_random_index( unsigned int idx = multilist_get_random_index(
mc->mc_metaslab_txg_list); &mc->mc_metaslab_txg_list);
multilist_sublist_t *mls = multilist_sublist_t *mls =
multilist_sublist_lock(mc->mc_metaslab_txg_list, idx); multilist_sublist_lock(&mc->mc_metaslab_txg_list, idx);
metaslab_t *msp = multilist_sublist_head(mls); metaslab_t *msp = multilist_sublist_head(mls);
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 < while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 <
inuse * size) { inuse * size) {
VERIFY3P(mls, ==, multilist_sublist_lock( VERIFY3P(mls, ==, multilist_sublist_lock(
mc->mc_metaslab_txg_list, idx)); &mc->mc_metaslab_txg_list, idx));
ASSERT3U(idx, ==, ASSERT3U(idx, ==,
metaslab_idx_func(mc->mc_metaslab_txg_list, msp)); metaslab_idx_func(&mc->mc_metaslab_txg_list, msp));
if (!multilist_link_active(&msp->ms_class_txg_node)) { if (!multilist_link_active(&msp->ms_class_txg_node)) {
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
@ -2535,7 +2535,7 @@ metaslab_unload(metaslab_t *msp)
if (msp->ms_group != NULL) { if (msp->ms_group != NULL) {
metaslab_class_t *mc = msp->ms_group->mg_class; metaslab_class_t *mc = msp->ms_group->mg_class;
multilist_sublist_t *mls = multilist_sublist_t *mls =
multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
if (multilist_link_active(&msp->ms_class_txg_node)) if (multilist_link_active(&msp->ms_class_txg_node))
multilist_sublist_remove(mls, msp); multilist_sublist_remove(mls, msp);
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
@ -2600,7 +2600,7 @@ metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg)
ASSERT(MUTEX_HELD(&msp->ms_lock)); ASSERT(MUTEX_HELD(&msp->ms_lock));
metaslab_class_t *mc = msp->ms_group->mg_class; metaslab_class_t *mc = msp->ms_group->mg_class;
multilist_sublist_t *mls = multilist_sublist_t *mls =
multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
if (multilist_link_active(&msp->ms_class_txg_node)) if (multilist_link_active(&msp->ms_class_txg_node))
multilist_sublist_remove(mls, msp); multilist_sublist_remove(mls, msp);
msp->ms_selected_txg = txg; msp->ms_selected_txg = txg;
@ -5682,7 +5682,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size,
if (spa_writeable(spa)) { /* don't dirty if we're zdb(8) */ if (spa_writeable(spa)) { /* don't dirty if we're zdb(8) */
metaslab_class_t *mc = msp->ms_group->mg_class; metaslab_class_t *mc = msp->ms_group->mg_class;
multilist_sublist_t *mls = multilist_sublist_t *mls =
multilist_sublist_lock_obj(mc->mc_metaslab_txg_list, msp); multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
if (!multilist_link_active(&msp->ms_class_txg_node)) { if (!multilist_link_active(&msp->ms_class_txg_node)) {
msp->ms_selected_txg = txg; msp->ms_selected_txg = txg;
multilist_sublist_insert_head(mls, msp); multilist_sublist_insert_head(mls, msp);

View File

@ -68,8 +68,8 @@ multilist_d2l(multilist_t *ml, void *obj)
* requirement, but a general rule of thumb in order to garner the * requirement, but a general rule of thumb in order to garner the
* best multi-threaded performance out of the data structure. * best multi-threaded performance out of the data structure.
*/ */
static multilist_t * static void
multilist_create_impl(size_t size, size_t offset, multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
unsigned int num, multilist_sublist_index_func_t *index_func) unsigned int num, multilist_sublist_index_func_t *index_func)
{ {
ASSERT3U(size, >, 0); ASSERT3U(size, >, 0);
@ -77,7 +77,6 @@ multilist_create_impl(size_t size, size_t offset,
ASSERT3U(num, >, 0); ASSERT3U(num, >, 0);
ASSERT3P(index_func, !=, NULL); ASSERT3P(index_func, !=, NULL);
multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
ml->ml_offset = offset; ml->ml_offset = offset;
ml->ml_num_sublists = num; ml->ml_num_sublists = num;
ml->ml_index_func = index_func; ml->ml_index_func = index_func;
@ -92,7 +91,6 @@ multilist_create_impl(size_t size, size_t offset,
mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL); mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
list_create(&mls->mls_list, size, offset); list_create(&mls->mls_list, size, offset);
} }
return (ml);
} }
/* /*
@ -103,8 +101,8 @@ multilist_create_impl(size_t size, size_t offset,
* reserve the RAM necessary to create the extra slots for additional CPUs up * reserve the RAM necessary to create the extra slots for additional CPUs up
* front, and dynamically adding them is a complex task. * front, and dynamically adding them is a complex task.
*/ */
multilist_t * void
multilist_create(size_t size, size_t offset, multilist_create(multilist_t *ml, size_t size, size_t offset,
multilist_sublist_index_func_t *index_func) multilist_sublist_index_func_t *index_func)
{ {
int num_sublists; int num_sublists;
@ -115,7 +113,7 @@ multilist_create(size_t size, size_t offset,
num_sublists = MAX(boot_ncpus, 4); num_sublists = MAX(boot_ncpus, 4);
} }
return (multilist_create_impl(size, offset, num_sublists, index_func)); multilist_create_impl(ml, size, offset, num_sublists, index_func);
} }
/* /*
@ -141,7 +139,7 @@ multilist_destroy(multilist_t *ml)
ml->ml_num_sublists = 0; ml->ml_num_sublists = 0;
ml->ml_offset = 0; ml->ml_offset = 0;
kmem_free(ml, sizeof (multilist_t)); ml->ml_sublists = NULL;
} }
/* /*